Source of this notebook: [Udemy: Python for Data Science and Machine Learning Bootcamp](https://www.udemy.com/python-for-data-science-and-machine-learning-bootcamp) by Jose Portilla

In [1]:
from pandas_datareader import data, wb
import pandas as pd
import numpy as np
import datetime
import seaborn as sns
%matplotlib inline

## Data

We need to get data using pandas [datareader](https://pandas-datareader.readthedocs.io/en/latest/remote_data.html). We will get stock information for the following banks:
*  Bank of America
* CitiGroup
* Goldman Sachs
* JPMorgan Chase
* Morgan Stanley
* Wells Fargo

## Read the data from remote server

In [2]:
start = datetime.datetime(2013,12,31)
end = datetime.datetime(2019,1,1)

In [4]:
# needed due to issue with firewall
import requests
session = requests.Session()
session.verify = False #SSL verification will be turned OFF, requests will give you an InsecureRequestWarning

In [6]:
# get stock infromation from remote server
BAC = data.DataReader('BAC', 'iex', start, end, session=session) # Bank of America
C = data.DataReader('C','iex',start,end, session=session) #CitiGroup
GS = data.DataReader('GS','iex',start,end, session=session) # Goldman Sachs
JPM = data.DataReader('JPM','iex',start,end, session=session) # JPMorgan Chase
MS = data.DataReader('MS','iex',start,end, session=session) # Morgan Stanley
WFC = data.DataReader('WFC','iex',start,end, session=session) # Wells Fargo



In [7]:
# Create a list of the ticker symbols (as strings) in alphabetical order.
tickers = ['BAC','C','GS','JPM','MS','WFC']

## Consolidate data into one dataframe

In [8]:
# concatenate the individual bank dataframes into one dataframe, using the tickers as a column index
bank_stocks = pd.concat([BAC,C,GS,JPM,MS,WFC],keys=tickers,axis=1)
bank_stocks.head()

Unnamed: 0_level_0,BAC,BAC,BAC,BAC,BAC,C,C,C,C,C,...,MS,MS,MS,MS,MS,WFC,WFC,WFC,WFC,WFC
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-08,15.5611,15.5797,15.4202,15.4771,101227932,51.6789,52.3547,51.5932,52.1739,26002326,...,28.7932,29.0441,28.6199,28.7932,8930184,39.3699,39.8469,39.3266,39.8209,20907314
2014-01-09,15.5611,15.8004,15.5097,15.7104,101275343,52.3547,52.6213,52.1263,52.5451,21812007,...,28.8844,28.9574,28.6107,28.7841,7322294,39.8642,40.0637,39.7168,40.029,14389488
2014-01-10,15.6358,15.6731,15.5051,15.6544,87682477,52.3833,52.4372,51.6884,52.0882,22496888,...,28.8114,28.8571,28.3735,28.556,10571296,39.9423,39.9423,39.5694,39.8382,15532776
2014-01-13,15.6731,15.6824,15.309,15.337,90827849,51.8407,52.0311,50.8698,51.1363,21116568,...,28.556,28.7567,28.0542,28.1637,9605058,39.8555,39.9813,39.4133,39.5087,21059377
2014-01-14,15.4397,15.6544,15.4304,15.6544,98517898,51.5266,51.7836,51.0506,51.3552,17790818,...,28.0633,28.483,27.9174,28.3735,13721984,39.4393,39.7515,38.9537,39.5347,28526935


In [9]:
# set the names of the column level names for indices
bank_stocks.columns.names = ['Bank Ticker','Stock Info']

In [10]:
# look at the first few rows of the consolidated dataframe
bank_stocks.head()

Bank Ticker,BAC,BAC,BAC,BAC,BAC,C,C,C,C,C,...,MS,MS,MS,MS,MS,WFC,WFC,WFC,WFC,WFC
Stock Info,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2014-01-08,15.5611,15.5797,15.4202,15.4771,101227932,51.6789,52.3547,51.5932,52.1739,26002326,...,28.7932,29.0441,28.6199,28.7932,8930184,39.3699,39.8469,39.3266,39.8209,20907314
2014-01-09,15.5611,15.8004,15.5097,15.7104,101275343,52.3547,52.6213,52.1263,52.5451,21812007,...,28.8844,28.9574,28.6107,28.7841,7322294,39.8642,40.0637,39.7168,40.029,14389488
2014-01-10,15.6358,15.6731,15.5051,15.6544,87682477,52.3833,52.4372,51.6884,52.0882,22496888,...,28.8114,28.8571,28.3735,28.556,10571296,39.9423,39.9423,39.5694,39.8382,15532776
2014-01-13,15.6731,15.6824,15.309,15.337,90827849,51.8407,52.0311,50.8698,51.1363,21116568,...,28.556,28.7567,28.0542,28.1637,9605058,39.8555,39.9813,39.4133,39.5087,21059377
2014-01-14,15.4397,15.6544,15.4304,15.6544,98517898,51.5266,51.7836,51.0506,51.3552,17790818,...,28.0633,28.483,27.9174,28.3735,13721984,39.4393,39.7515,38.9537,39.5347,28526935


## Explore the data

Following steps make use of [Multi-Level Indexing](http://pandas.pydata.org/pandas-docs/stable/advanced.html).
The pandas dataframe [.xs()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.xs.html), and the pandas dataframe [.xs()](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.xs.html) function is also used - this function can be used to get a cross-section of the data in a dataframe.

### Find the max close price for each bank

In [11]:
# Method 1
for tick in tickers:
    print(tick,bank_stocks[tick]['close'].max())

BAC 32.3681
C 78.3443
GS 270.4223
JPM 116.856
MS 57.8855
WFC 64.0585


In [12]:
# Method 2
bank_stocks.xs(key='close',axis=1,level='Stock Info').head(2)

Bank Ticker,BAC,C,GS,JPM,MS,WFC
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-08,15.4771,52.1739,166.7442,51.7386,28.7932,39.8209
2014-01-09,15.7104,52.5451,165.7724,51.6419,28.7841,40.029


In [13]:
bank_stocks.xs(key='close',axis=1,level='Stock Info').max()

Bank Ticker
BAC     32.3681
C       78.3443
GS     270.4223
JPM    116.8560
MS      57.8855
WFC     64.0585
dtype: float64

### Calculate percent returns for each bank
Returns are typically defined by:

$$r_t = \frac{p_t - p_{t-1}}{p_{t-1}} = \frac{p_t}{p_{t-1}} - 1$$

In [None]:
# create a new pandas dataframe to hold the returns data
returns = pd.DataFrame()

We can use pandas [pct_change()](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.pct_change.html) method on the Close column to create a column representing this return value.

In [None]:
for tick in tickers:
    returns[f'{tick} Returns'] = bank_stocks[tick]['close'].pct_change()
returns.head()

### Create a [seaborn](https://seaborn.pydata.org/) [pairplot](https://seaborn.pydata.org/generated/seaborn.pairplot.html) using the returns dataframe.

In [None]:
# note that the .dropna() function removes the initial row of 'NaN' data
sns.pairplot(returns.dropna(how='all'))

### Determine the dates each bank stock had the best and worst single day returns

In [None]:
df_returns = pd.DataFrame()

In [None]:
# date of worst return
df_returns['Worst Day'] = returns.idxmin()  # returns index of the min

In [None]:
# value of worst return
df_returns['Worst Day Value'] = returns.min()  # min value in each column

In [None]:
df_returns['Best Day'] = returns.idxmax()

In [None]:
df_returns['Best Day Value'] = returns.max()

In [None]:
df_returns

### Examine standard deviation of the returns

In [None]:
# standard deviation over all rows
returns.std()

In [None]:
# standard deviation over 2015
returns.loc['2015-01-01':'2015-12-31'].std()

### Create a distplot using seaborn of the 2015 returns for Morgan Stanley

In [None]:
from matplotlib import pyplot
fig, ax = pyplot.subplots(figsize=(12,6))
sns.set_style("whitegrid")
sns.distplot(returns.loc['2015-01-01':'2015-12-31']['MS Returns'],color='green',bins=50,ax=ax)

Note: the line in the plot above is a "kde" ([kernal density estimation](https://seaborn.pydata.org/tutorial/distributions.html#kernel-density-estimation) plot

____
## More Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# Optional Plotly Method Imports
import plotly
import cufflinks as cf
cf.go_offline()

### Create a line plot showing Close price for each bank for the entire index of time

In [None]:
# use dataframe plotting method
for tick in tickers:
    bank_stocks[tick]['close'].plot(label=tick,figsize=(12,4))
plt.legend(loc='center left',bbox_to_anchor=(1,.5))

### Create an interactive plot using plotly

In [None]:
bank_stocks.xs(key='close',axis=1,level='Stock Info').iplot()

### Moving Averages

Let's analyze the moving averages for these stocks in the year 2015. 

In [None]:
plt.figure(figsize=(12,4))
BAC['close'].loc['2015-01-01':'2016-01-01'].rolling(window=30).mean().plot(label='30 day moving average')
BAC['close'].loc['2015-01-01':'2016-01-01'].plot(label='BAC Close')
plt.legend()

### Create a heatmap of the correlation between the stocks Close Price
Uses the pandas dataframe [.corr()](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.corr.html) method to calculate pairwise correlation of the columns

In [None]:
bank_stocks.xs(key='close',axis=1,level='Stock Info').corr()

In [None]:
sns.heatmap(bank_stocks.xs(key='close',axis=1,level='Stock Info').corr(),annot=True)

### Create a clustermap of the correlation between the stocks Close Price

In [None]:
sns.clustermap(bank_stocks.xs(key='close',axis=1,level='Stock Info').corr(),annot=True)

# Create Technical Analysis Plots

In [None]:
close_corr = bank_stocks.xs(key='close',axis=1,level='Stock Info').corr()

In [None]:
close_corr

### Create a candle stick plot

In [None]:
bac15 = BAC[['open','high','low','close']].loc['2015-01-01':'2016-01-01'] # need in specific order
bac15.iplot(kind='candle')

### Create a [moving average plot](https://plot.ly/pandas/moving-average/#moving-averages-in-cufflinks)

In [None]:
MS['close'].loc['2015-01-01':'2016-01-01'].ta_plot(study='sma',periods=[13,21,55])  # plotly technical analysis plots

### Create a [Bollinger Band Plot](https://plot.ly/pandas/bollinger-bands/)

In [None]:
#Bollinger Band Plot
BAC['close'].loc['2015-01-01':'2016-01-01'].ta_plot(study='boll')