# Simple Linear Regression for stock using scikit-learn


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
yf.pdr_override()

In [2]:
stock = 'AAPL'
start = '2016-01-01' 
end = '2018-01-01'
data = yf.download(stock, start, end)
data.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,25.6525,26.342501,25.5,26.3375,24.286827,270597600
2016-01-05,26.4375,26.4625,25.602501,25.6775,23.678221,223164000
2016-01-06,25.139999,25.592501,24.967501,25.174999,23.214844,273829600
2016-01-07,24.67,25.032499,24.1075,24.112499,22.235073,324377600
2016-01-08,24.637501,24.7775,24.190001,24.24,22.352646,283192000


In [3]:
df = data.reset_index()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-04,25.6525,26.342501,25.5,26.3375,24.286827,270597600
1,2016-01-05,26.4375,26.4625,25.602501,25.6775,23.678221,223164000
2,2016-01-06,25.139999,25.592501,24.967501,25.174999,23.214844,273829600
3,2016-01-07,24.67,25.032499,24.1075,24.112499,22.235073,324377600
4,2016-01-08,24.637501,24.7775,24.190001,24.24,22.352646,283192000


In [4]:
X = df.drop(['Date','Close'], axis=1)
y = df['Adj Close']

In [5]:
from sklearn.model_selection import train_test_split

# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                   test_size=0.25, random_state=0)

In [6]:
from sklearn.linear_model import LinearRegression

regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [7]:
intercept = regression_model.intercept_

print("The intercept for our model is {}".format(intercept))

The intercept for our model is -1.659117287999834e-12


In [8]:
regression_model.score(X_test, y_test)

1.0

In [9]:
from sklearn.metrics import mean_squared_error

y_predict = regression_model.predict(X_test)

regression_model_mse = mean_squared_error(y_predict, y_test)

regression_model_mse

2.57258510115635e-25

In [10]:
math.sqrt(regression_model_mse)

5.07206575386829e-13

In [11]:
# input the latest Open, High, Low, Close, Volume
# predicts the next day price
regression_model.predict([[167.81, 171.75, 165.19, 166.48, 37232900]])

array([166.48])