## Different Models Stocks - Predicting Close Value from Open

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('NFLX.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-09-25,474.390015,484.869995,468.029999,482.880005,482.880005,3769400
1,2020-09-28,489.109985,492.0,477.880005,490.649994,490.649994,4773500
2,2020-09-29,489.5,496.290009,486.529999,493.480011,493.480011,3541500
3,2020-09-30,492.570007,504.630005,489.269989,500.029999,500.029999,4634100
4,2020-10-01,506.029999,529.549988,503.600006,527.51001,527.51001,8153700


## Sorting according to date

In [3]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date'][:5]

0   2020-09-25
1   2020-09-28
2   2020-09-29
3   2020-09-30
4   2020-10-01
Name: Date, dtype: datetime64[ns]

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error

### 1. Linear Regression

In [5]:
X = np.asanyarray(df['Open'])
Y = np.asanyarray(df['Close'])
X = X.reshape(-1, 1)
Y = Y.reshape(-1,1)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=4)
print("Train Size: ",len(Y_train))
print("Train Size: ",len(Y_test))

Train Size:  201
Train Size:  51


In [6]:
from sklearn.linear_model import LinearRegression

In [7]:
lre = LinearRegression()
lre.get_params().keys()
parameters = {'fit_intercept':[True,False], 'normalize':[True,False], 'copy_X':[True, False]}
lre_grid = GridSearchCV(lre,parameters,cv=None)
lre_grid.fit(X_train,Y_train)
print("Done")

Done


In [8]:
lre_grid.score(X_test,Y_test)

0.9281459668666899

In [9]:
price = 592.50
price = np.asanyarray(price)
print(lre_grid.predict(price.reshape(-1,1)))

[[590.85159867]]


### 2. Random Forest Regression

In [10]:
from sklearn.ensemble import RandomForestRegressor

In [11]:
rfr = RandomForestRegressor()
rfr.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [12]:
rfr.fit(X_train,Y_train.ravel())
rfr.score(X_test,Y_test)

0.9033714467782668

### 3. Ridge Regression

In [13]:
from sklearn.linear_model import Ridge

In [14]:
ridge = Ridge()
ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [15]:
params = {'alpha':[1.0,2.0],'copy_X':[True,False],'normalize':[True,False]}
ridge_grid = GridSearchCV(ridge,params,cv=None)
ridge_grid.fit(X_train,Y_train)
print("Ridge Best Params: ",ridge_grid.best_params_)
print("Score: ",ridge_grid.score(X_test,Y_test))

Ridge Best Params:  {'alpha': 2.0, 'copy_X': True, 'normalize': False}
Score:  0.9281464223779379


### 4. Lasso Regression

In [16]:
from sklearn.linear_model import Lasso

In [17]:
lasso = Lasso()
lasso.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [18]:
params = {'alpha':[1.0,2.0,3.0]}
lasso_grid = GridSearchCV(lasso,params,cv=None)
lasso_grid.fit(X_train,Y_train)
print("Lasso Best Params",lasso_grid.best_params_)
print("Lasso Score",lasso_grid.score(X_test,Y_test))

Lasso Best Params {'alpha': 1.0}
Lasso Score 0.9281914896582076


### Evaluation of Models
Using Accuracy, F1-Score, Classification Matrix

### 1. Linear Regression

In [19]:
Y_preds_lre = lre_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_lre))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_lre))

R^2 Score:  0.9281459668666899
Mean Squared Error:  75.36409899827412


### 2. Random Forest Regression

In [20]:
Y_preds_rf = rfr.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_rf))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_rf))

R^2 Score:  0.9033714467782668
Mean Squared Error:  101.34885313329427


### 3. Ridge Regression

In [21]:
Y_preds_ridge = ridge_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_ridge))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_ridge))

R^2 Score:  0.9281464223779379
Mean Squared Error:  75.36362123532483


### 4. Lasso Regression

In [22]:
Y_preds_lasso = lasso_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_lasso))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_lasso))

R^2 Score:  0.9281914896582076
Mean Squared Error:  75.31635242070548


## Saving the models

In [23]:
import pickle

In [24]:
lre_name = "linear_regression.sav"
rfr_name = "random_forest.sav"
ridge_name = "ridge_regression.sav"
lasso_name = "lasso_regression.sav"

In [25]:
pickle.dump(lre_grid, open(lre_name,mode='wb'))
pickle.dump(rfr, open(rfr_name,mode='wb'))
pickle.dump(ridge_grid, open(ridge_name,mode='wb'))
pickle.dump(lasso_grid, open(lasso_name,mode='wb'))

#### Testing if loaded properly

In [27]:
model = pickle.load(open(lasso_name,mode='rb'))
print("Score: ",model.score(X_test,Y_test))

Score:  0.9281914896582076
