# Google Stock Predictor

### Google Stock data from 2004-2017

In [1]:
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv('googl.us.txt', sep=',')

In [3]:
print(df.head())

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt
0,2004-08-19,50.0,52.03,47.98,50.17,44703800,0
1,2004-08-20,50.505,54.54,50.25,54.155,22857200,0
2,2004-08-23,55.375,56.74,54.525,54.7,18274400,0
3,2004-08-24,55.62,55.8,51.785,52.435,15262600,0
4,2004-08-25,52.48,54.0,51.94,53.0,9197800,0


In [4]:
df.drop(['OpenInt'], axis=1, inplace=True)

In [5]:
print(df)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2004-08-19,50.000,52.030,47.980,50.170,44703800
1,2004-08-20,50.505,54.540,50.250,54.155,22857200
2,2004-08-23,55.375,56.740,54.525,54.700,18274400
3,2004-08-24,55.620,55.800,51.785,52.435,15262600
4,2004-08-25,52.480,54.000,51.940,53.000,9197800
5,2004-08-26,52.475,53.975,52.330,53.955,7102000
6,2004-08-27,54.050,54.310,52.845,53.075,6218000
7,2004-08-30,52.640,52.745,51.005,51.005,5202000
8,2004-08-31,51.150,51.855,51.080,51.185,4922800
9,2004-09-01,51.350,51.485,49.835,50.125,9147400


#### Shift Close values 30 rows down so model is trained to predict future close prices

In [6]:
# shift closing values 30 days into future
df.Close = df.Close.shift(30)

In [13]:
# removing 'NaN' values from dataframe
df = df.dropna()

In [14]:
print(df.head())

Unnamed: 0,Date,Open,High,Low,Close,Volume
30,2004-10-01,65.4,67.12,64.45,50.17,15140000
31,2004-10-04,67.625,68.435,67.015,54.155,13035800
32,2004-10-05,67.33,69.265,66.12,54.7,14988200
33,2004-10-06,68.775,69.225,68.0,52.435,13394800
34,2004-10-07,68.46,69.94,68.275,53.0,14129200


In [8]:
#split data into features and labels

X = df[['Open', 'High', 'Low', 'Volume']]
#remember the closing refers to the closing price 10 days in the future
y = df['Close']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.4, test_size=.4)
reg = linear_model.BayesianRidge()
reg.fit(X_train, y_train)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [10]:
prediction = reg.predict(X_test)

In [11]:
accuracy = r2_score(y_test, prediction)

In [12]:
print(accuracy)

0.9802543046793678
