# Linear Regression Practice by Using Stock Data

This code performs the linear regression function and usage, in addition, we use online stock data from Yahoo! finance website. From this code, you can learn how to fetch stock data from Yahoo! finance and how to do linear regression.

Edited by Chan, Chun-Hsiang @ 20171017 Taipei

In [1]:
# import essential packages
import pandas as pd
import datetime
from pandas_datareader import data, wb
import pandas_datareader.data as web
import pandas_datareader as pdr
from __future__ import print_function
import numpy as np
import statsmodels.api as sm
%matplotlib inline
import matplotlib.pyplot as plt
from statsmodels.sandbox.regression.predstd import wls_prediction_std

In [2]:
# set the range of time period
start = datetime.datetime(2016, 1, 1)
end = datetime.datetime(2017, 10, 12)

In [3]:
# fetch the AAPL stock data
# data attribute: Date(Index), Open, High, Low, Close, Adj Close, Volume
AAPL_stock = web.DataReader("AAPL", 'yahoo', start, end)
# fetch the NASDAQ index data
# data attribute: Date(Index), Open, High, Low, Close, Adj Close, Volume
NASDAQ_stock = web.DataReader("^IXIC", 'yahoo', start, end)

In [4]:
# extract Adjusted Close
y_AAPL   = AAPL_stock['Adj Close'].values
X_NASDAQ = NASDAQ_stock['Adj Close'].values
# add constant
X1 = sm.add_constant(X_NASDAQ)

In [5]:
# fit the linear regression model
model_1   = sm.OLS(y_AAPL, X1)
results_1 = model_1.fit()
print(results_1.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.939
Model:                            OLS   Adj. R-squared:                  0.939
Method:                 Least Squares   F-statistic:                     6912.
Date:                Sat, 14 Oct 2017   Prob (F-statistic):          1.97e-274
Time:                        20:18:05   Log-Likelihood:                -1425.7
No. Observations:                 450   AIC:                             2855.
Df Residuals:                     448   BIC:                             2864.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -78.9300      2.419    -32.635      0.0

In [6]:
# extract the data (fitting:: AAPL(t) with NASDAQ(t) and NASDAQ(t-1))
X_NASDAQt   = X_NASDAQ[1:len(X_NASDAQ)]
X_NASDAQt_1 = X_NASDAQ[0:len(X_NASDAQ)-1]
# stack the t data and t-1 data together
X2 = np.column_stack((X_NASDAQt, X_NASDAQt_1))
# add constant
X2 = sm.add_constant(X2)
# extract AAPL(t)
y_AAPL2 = y_AAPL[1:len(y_AAPL)]

In [7]:
# fit the linear regression model
model_2 = sm.OLS(y_AAPL2, X2)
results_2 = model_2.fit()
print(results_2.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.939
Model:                            OLS   Adj. R-squared:                  0.939
Method:                 Least Squares   F-statistic:                     3455.
Date:                Sat, 14 Oct 2017   Prob (F-statistic):          3.44e-272
Time:                        20:18:05   Log-Likelihood:                -1421.8
No. Observations:                 449   AIC:                             2850.
Df Residuals:                     446   BIC:                             2862.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -79.1435      2.425    -32.634      0.0