## Different Models Stocks - Predicting Close Value from Open

In [35]:
import pandas as pd
import numpy as np

In [36]:
df = pd.read_csv('AAPL.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-09-25,108.43,112.440002,107.669998,112.279999,111.562439,149981400
1,2020-09-28,115.010002,115.32,112.779999,114.959999,114.225311,137672400
2,2020-09-29,114.550003,115.309998,113.57,114.089996,113.360878,99382200
3,2020-09-30,113.790001,117.260002,113.620003,115.809998,115.069893,142675200
4,2020-10-01,117.639999,117.720001,115.830002,116.790001,116.043625,116120400


## Sorting according to date

In [37]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date'][:5]

0   2020-09-25
1   2020-09-28
2   2020-09-29
3   2020-09-30
4   2020-10-01
Name: Date, dtype: datetime64[ns]

In [38]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error

### 1. Linear Regression

In [39]:
X = np.asanyarray(df['Open'])
Y = np.asanyarray(df['Close'])
X = X.reshape(-1, 1)
Y = Y.reshape(-1,1)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=4)
print("Train Size: ",len(Y_train))
print("Train Size: ",len(Y_test))

Train Size:  201
Train Size:  51


In [40]:
from sklearn.linear_model import LinearRegression

In [41]:
lre = LinearRegression()
lre.get_params().keys()
parameters = {'fit_intercept':[True,False], 'normalize':[True,False], 'copy_X':[True, False]}
lre_grid = GridSearchCV(lre,parameters,cv=None)
lre_grid.fit(X_train,Y_train)
print("Done")

Done


In [42]:
lre_grid.score(X_test,Y_test)

0.9716134965067678

In [43]:
price = 592.50
price = np.asanyarray(price)
print(lre_grid.predict(price.reshape(-1,1)))

[[592.32812372]]


### 2. Random Forest Regression

In [44]:
from sklearn.ensemble import RandomForestRegressor

In [45]:
rfr = RandomForestRegressor()
rfr.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [46]:
rfr.fit(X_train,Y_train.ravel())
rfr.score(X_test,Y_test)

0.9622124272470758

### 3. Ridge Regression

In [47]:
from sklearn.linear_model import Ridge

In [48]:
ridge = Ridge()
ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [49]:
params = {'alpha':[1.0,2.0],'copy_X':[True,False],'normalize':[True,False]}
ridge_grid = GridSearchCV(ridge,params,cv=None)
ridge_grid.fit(X_train,Y_train)
print("Ridge Best Params: ",ridge_grid.best_params_)
print("Score: ",ridge_grid.score(X_test,Y_test))

Ridge Best Params:  {'alpha': 2.0, 'copy_X': True, 'normalize': False}
Score:  0.9718916783088138


### 4. Lasso Regression

In [50]:
from sklearn.linear_model import Lasso

In [51]:
lasso = Lasso()
lasso.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [52]:
params = {'alpha':[1.0,2.0,3.0]}
lasso_grid = GridSearchCV(lasso,params,cv=None)
lasso_grid.fit(X_train,Y_train)
print("Lasso Best Params",lasso_grid.best_params_)
print("Lasso Score",lasso_grid.score(X_test,Y_test))

Lasso Best Params {'alpha': 1.0}
Lasso Score 0.9725502296819046


### Evaluation of Models
Using Accuracy, F1-Score, Classification Matrix

### 1. Linear Regression

In [53]:
Y_preds_lre = lre_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_lre))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_lre))

R^2 Score:  0.9716134965067678
Mean Squared Error:  3.9604876498684924


### 2. Random Forest Regression

In [54]:
Y_preds_rf = rfr.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_rf))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_rf))

R^2 Score:  0.9622124272470758
Mean Squared Error:  5.272125721371205


### 3. Ridge Regression

In [55]:
Y_preds_ridge = ridge_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_ridge))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_ridge))

R^2 Score:  0.9718916783088138
Mean Squared Error:  3.921675698559144


### 4. Lasso Regression

In [56]:
Y_preds_lasso = lasso_grid.predict(X_test)
print("R^2 Score: ",r2_score(Y_test,Y_preds_lasso))
print("Mean Squared Error: ",mean_squared_error(Y_test,Y_preds_lasso))

R^2 Score:  0.9725502296819046
Mean Squared Error:  3.8297945487531493


## Saving the models

In [57]:
import pickle

In [58]:
lre_name = "linear_regression_aapl.pickle"
rfr_name = "random_forest_aapl.pickle"
ridge_name = "ridge_regression_aapl.pickle"
lasso_name = "lasso_regression_aapl.pickle"

In [59]:
pickle.dump(lre_grid, open(lre_name,mode='wb'))
pickle.dump(rfr, open(rfr_name,mode='wb'))
pickle.dump(ridge_grid, open(ridge_name,mode='wb'))
pickle.dump(lasso_grid, open(lasso_name,mode='wb'))

#### Testing if loaded properly

In [60]:
model = pickle.load(open(lasso_name,mode='rb'))
print("Score: ",model.score(X_test,Y_test))

Score:  0.9725502296819046


## Decided on creating a linear regression model

In [61]:
lre = LinearRegression()

In [62]:
lre.fit(X_train,Y_train)
y_preds = lre.predict(X_test)

In [63]:
lre.score(X_test,Y_test)

0.9718843793422073

In [64]:
lre.get_params()

{'copy_X': True,
 'fit_intercept': True,
 'n_jobs': None,
 'normalize': False,
 'positive': False}

In [65]:
params = {'copy_X':[True,False],'fit_intercept':[True,False],'normalize':[True,False],'positive':[True,False]}
lre_grid = GridSearchCV(lre,params,cv=5)
lre_grid.fit(X_train,Y_train)
lre_grid.best_params_

{'copy_X': True, 'fit_intercept': False, 'normalize': True, 'positive': False}

In [66]:
lre_grid.best_score_

0.9752638775528141

In [67]:
lre_grid.score(X_train,Y_train)

0.9762446226498078

In [68]:
lre_grid.fit(X,Y)
lre_grid.score(X_test,Y_test)

0.9717276063681367

In [69]:
print("Finished")

Finished
