# Applying Regression on Stock Prices  
In this notebook we will be applying a number of regression algorithms present in the Scikit Learn library. 


In [24]:
%config IPCompleter.greedy=True

In [3]:
# Lets import a few important libraries first
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import pandas_profiling 


In [4]:
# yFinance will help us fetch the data for our dataset
import yfinance as yf

## Fetch Data from Yahoo Finance
Time to fetch some data from Yahoo!. We are going to pull 5 years price data of  
FAANG - Facebook Apple Amazon Netflix Google  


In [29]:
# Facebook
data_FB = yf.download("FB", start="2014-01-01", end="2019-09-10")
# Apple
data_AAPL = yf.download("AAPL", start="2014-01-01", end="2019-09-10")
# Amazon
data_AMZN = yf.download("AMZN", start="2014-01-01", end="2019-09-10")
# Netflix
data_NFLX = yf.download("NFLX", start="2014-01-01", end="2019-09-10")
# Google/Alphabet
data_GOOGL = yf.download("GOOGL", start="2014-01-01", end="2019-09-10")

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded


In [38]:
# Validate the Data
# Shape should be same
display(type(data_FB))
display(data_FB.head())
display(data_FB.shape)
display(data_AAPL.shape)
display(data_AMZN.shape)
display(data_NFLX.shape)
display(data_GOOGL.shape)

pandas.core.frame.DataFrame

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2013-12-31,54.65
2014-01-02,54.71
2014-01-03,54.56
2014-01-06,57.2
2014-01-07,57.92


(1432, 1)

(1432, 1)

(1432, 1)

(1432, 1)

(1432, 1)

## Combine all the Dataframes  
Let's create a single dataframe that contains date and the _Adj Close_ price for each stock.

In [44]:
# Drop all columns except for the Adj Close price 
data_FB = data_FB[['Adj Close']]
data_AAPL = data_AAPL[['Adj Close']]
data_AMZN = data_AMZN[['Adj Close']]
data_NFLX = data_NFLX[['Adj Close']]
data_GOOGL = data_GOOGL[['Adj Close']]

# Merge the Dataframes
stock_data = data_GOOGL.merge(data_AAPL, on='Date', how='left')\
            .merge(data_AMZN, on='Date', how='left')\
            .merge(data_FB, on='Date', how='left')\
            .merge(data_NFLX, on='Date', how='left')
# Fix the column names 
stock_data.columns = ['GOOGL', 'AAPL', 'AMZN', 'FB', 'NFLX']
display(stock_data)

Unnamed: 0_level_0,GOOGL,AAPL,AMZN,FB,NFLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-12-31,560.92,72.34,398.79,54.65,52.60
2014-01-02,557.12,71.32,397.97,54.71,51.83
2014-01-03,553.05,69.75,396.44,54.56,51.87
2014-01-06,559.22,70.13,393.63,57.20,51.37
2014-01-07,570.00,69.63,398.03,57.92,48.50
...,...,...,...,...,...
2019-09-03,1169.55,205.70,1789.84,182.39,289.29
2019-09-04,1182.27,209.19,1800.62,187.14,291.52
2019-09-05,1212.19,213.28,1840.72,190.90,293.25
2019-09-06,1206.32,213.26,1833.51,187.49,290.17


## Calculate Daily Returns 
Instead of directly working with the stock prices, we are going to calculate the _log of Daily Returns_.  
We are going to try and predict the log returns instead of closing prices. 

In [52]:
# Stock Returns 
stock_returns = stock_data/stock_data.shift(1) - 1
# Drop the first row (need two dates to calculate returns, so NAN returns for the first day)
stock_returns.dropna(axis='index', how='any', inplace=True)
display(stock_returns.shape)
display(stock_returns)

(1431, 5)

Unnamed: 0_level_0,GOOGL,AAPL,AMZN,FB,NFLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,-0.006775,-0.014100,-0.002056,0.001098,-0.014639
2014-01-03,-0.007305,-0.022013,-0.003845,-0.002742,0.000772
2014-01-06,0.011156,0.005448,-0.007088,0.048387,-0.009639
2014-01-07,0.019277,-0.007130,0.011178,0.012587,-0.055869
2014-01-08,0.002088,0.006319,0.009773,0.005352,0.004330
...,...,...,...,...,...
2019-09-03,-0.017622,-0.014564,0.007628,-0.017666,-0.015183
2019-09-04,0.010876,0.016966,0.006023,0.026043,0.007709
2019-09-05,0.025307,0.019552,0.022270,0.020092,0.005934
2019-09-06,-0.004842,-0.000094,-0.003917,-0.017863,-0.010503
