## Different Models Stocks - Predicting Close Value from Open

In [222]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [223]:
df = pd.read_csv('NFLX_MI.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-10-24,164.5,164.5,164.5,164.5,164.5,1
1,2017-10-25,164.5,164.5,164.5,164.5,164.5,0
2,2017-10-26,164.5,164.5,164.5,164.5,164.5,0
3,2017-10-27,164.5,164.5,164.5,164.5,164.5,0
4,2017-10-30,164.5,164.5,164.5,164.5,164.5,0


## Sorting according to date

In [224]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date'][:5]

0   2017-10-24
1   2017-10-25
2   2017-10-26
3   2017-10-27
4   2017-10-30
Name: Date, dtype: datetime64[ns]

In [225]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error

### 1. Linear Regression

In [226]:
X = np.asanyarray(df['Open'])
Y = np.asanyarray(df['Close'])
X = X.reshape(-1, 1)
Y = Y.reshape(-1,1)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=4)
print("Train Size: ",len(Y_train))
print("Train Size: ",len(Y_test))

Train Size:  799
Train Size:  200


In [227]:
from sklearn.linear_model import LinearRegression

In [228]:
lre = LinearRegression()
lre.get_params().keys()
parameters = {'fit_intercept':[True,False], 'normalize':[True,False], 'copy_X':[True, False]}
lre_grid = GridSearchCV(lre,parameters,cv=None)
lre_grid.fit(X_train,Y_train)
print("Done")

Done


In [229]:
lre_grid.score(X_test,Y_test)

0.9919711464534757

In [230]:
price = 592.50
price = np.asanyarray(price)
print(lre_grid.predict(price.reshape(-1,1)))

[[589.95685513]]


### 2. Random Forest Regression

In [231]:
from sklearn.ensemble import RandomForestRegressor

In [232]:
rfr = RandomForestRegressor()
rfr.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [233]:
rfr.fit(X_train,Y_train.ravel())
rfr.score(X_test,Y_test)

0.9869425272964318

### 3. Ridge Regression

In [234]:
from sklearn.linear_model import Ridge

In [235]:
ridge = Ridge()
ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [236]:
params = {'alpha':[1.0,2.0],'copy_X':[True,False],'normalize':[True,False]}
ridge_grid = GridSearchCV(ridge,params,cv=None)
ridge_grid.fit(X_train,Y_train)
print("Ridge Best Params: ",ridge_grid.best_params_)
print("Score: ",ridge_grid.score(X_test,Y_test))

Ridge Best Params:  {'alpha': 1.0, 'copy_X': True, 'normalize': False}
Score:  0.9919711428712045


### 4. Lasso Regression

In [237]:
from sklearn.linear_model import Lasso

In [238]:
lasso = Lasso()
lasso.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [239]:
params = {'alpha':[1.0,2.0,3.0]}
lasso_grid = GridSearchCV(lasso,params,cv=None)
lasso_grid.fit(X_train,Y_train)
print("Lasso Best Params",lasso_grid.best_params_)
print("Lasso Score",lasso_grid.score(X_test,Y_test))

Lasso Best Params {'alpha': 1.0}
Lasso Score 0.9919682464696862


### Evaluation of Models
Using Accuracy, F1-Score, Classification Matrix

### 1. Linear Regression

In [240]:
Y_preds_lre = lre_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_lre))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_lre))

R^2 Score:  0.9919711464534757
Mean Squared Error:  51.30942281604448


### 2. Random Forest Regression

In [241]:
Y_preds_rf = rfr.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_rf))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_rf))

R^2 Score:  0.9869425272964318
Mean Squared Error:  83.44546129457926


### 3. Ridge Regression

In [242]:
Y_preds_ridge = ridge_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_ridge))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_ridge))

R^2 Score:  0.9919711428712045
Mean Squared Error:  51.309445709009786


### 4. Lasso Regression

In [243]:
Y_preds_lasso = lasso_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_lasso))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_lasso))

R^2 Score:  0.9919682464696862
Mean Squared Error:  51.32795553600902


## Saving the models

In [244]:
import pickle

In [245]:
lre_name = "linear_regression.pickle"
rfr_name = "random_forest.pickle"
ridge_name = "ridge_regression.pickle"
lasso_name = "lasso_regression.pickle"

In [246]:
pickle.dump(lre_grid, open(lre_name,mode='wb'))
pickle.dump(rfr, open(rfr_name,mode='wb'))
pickle.dump(ridge_grid, open(ridge_name,mode='wb'))
pickle.dump(lasso_grid, open(lasso_name,mode='wb'))

#### Testing if loaded properly

In [247]:
model = pickle.load(open(lasso_name,mode='rb'))
print("Score: ",model.score(X_test,Y_test))

Score:  0.9919682464696862


## Decided on creating a linear regression model

In [248]:
lre = LinearRegression()

In [249]:
lre.fit(X_train,Y_train)
y_preds = lre.predict(X_test)

In [250]:
lre.score(X_test,Y_test)

0.9919711464534757

In [251]:
lre.get_params()

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': False,
 'positive': False}

In [252]:
params = {'copy_X':[True,False],'fit_intercept':[True,False],'normalize':[True,False],'positive':[True,False]}
lre_grid = GridSearchCV(lre,params,cv=5)
lre_grid.fit(X_train,Y_train)
lre_grid.best_params_

{'copy_X': True, 'fit_intercept': True, 'normalize': True, 'positive': True}

In [253]:
lre_grid.best_score_

0.9937339655931282

In [254]:
lre_grid.score(X_train,Y_train)

0.9937668155999513

In [255]:
lre_grid.fit(X,Y)
lre_grid.score(X_test,Y_test)

0.9921004305925755