# Essential Introduction to pandas 

### Video Tutorial: https://alphabench.com/data/essential-pandas-introduction.html

## 1. First we will set up our environment in Jupyter
### If you don't have pandas_datareader, at the command line enter
### conda install -c pandas-datareader or pip install if you don't have Anaconda


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as pdr

%matplotlib inline


## 2. Get data (5 years, GOOG AMZN)

In [None]:
amzn = pdr.get_data_yahoo('AMZN', '20141101')
goog = pdr.get_data_yahoo('GOOG', '2014-11-01')


## 3. View head, tail, info

In [None]:
amzn.head()

In [None]:
amzn.tail()

In [None]:
print(amzn.info(), goog.info())

## 4. Filtering  using .loc,  .iloc, index

In [None]:
amzn.loc['20121101','Open']

In [None]:
amzn.iloc[0,0]

In [None]:
amzn[amzn.index > '2017-11-01']

## 5. Filtering with criteria

In [None]:
print('Mean: %.3f  Std: %.3f'% (amzn['Volume'].mean(), amzn['Volume'].std()) )

In [None]:
high = amzn['Volume'].mean() + 2* amzn['Volume'].std()

In [None]:
len(amzn[amzn.Volume >high]) / len(amzn)

## 6. Adding Columns

In [None]:
amzn['pct_chng'] = amzn.Close.pct_change()

In [None]:
amzn.head()

## 7. Appending data

In [None]:
goog_11 = goog.loc['2017-11']
amzn_11 = amzn.loc['2017-11']

In [None]:
goog_11.append(amzn_11).tail()

## 8. Merging Columns from Different DataFrames

In [None]:
close = pd.DataFrame({'AZMN' : amzn.Close, 'GOOG' : goog.Close})
close.head()

## 9. Grouping  and sorting data 

In [None]:
goog['Volume'][goog.index > '2017'].resample('M').mean().plot()

## 10. Cumulative Sum and Rolling Calculations

In [None]:
amzn_change = np.log(amzn.Close /amzn.Close.shift(1))
goog_change = np.log(goog.Close / goog.Close.shift(1))

In [None]:
amzn_change.cumsum().plot()
goog_change.cumsum().plot()

## 11. Create new dataFrame and save to csv

In [None]:
new = pd.DataFrame({'AMZN': amzn_change[1:], 'GOOG' : goog_change[1:]})
new.to_csv('stocks.csv')

In [None]:
pd.read_csv('stocks.csv').head()

## 12. Change the index and remove columns
 

In [None]:
new_stocks = pd.read_csv('stocks.csv')
new_stocks.index = new_stocks.Date
new_stocks.head()

In [None]:
new_stocks.drop(['Date'], axis=1, inplace=True)
new_stocks.head()


## 13. Rename Columns

In [None]:
new_stocks.rename(columns={'AMZN': 'AMZN_chng'}, inplace=True)
new_stocks.head()