In [150]:
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import numpy as np
import seaborn as sns
import os

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.feature_selection import RFECV, SelectFromModel, SelectKBest
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
%matplotlib inline

In [151]:
Stock = pd.read_csv('AAPL.csv', index_col = 0)
pd.set_option('display.max_rows', None)
Stock.head()
df_Stock = Stock
df_Stock = df_Stock.rename(columns = {'Close(t)':'Close'})
df_Stock.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3732 entries, 2005-10-17 to 2020-08-13
Data columns (total 63 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Open              3732 non-null   float64
 1   High              3732 non-null   float64
 2   Low               3732 non-null   float64
 3   Close             3732 non-null   float64
 4   Volume            3732 non-null   int64  
 5   SD20              3732 non-null   float64
 6   Upper_Band        3732 non-null   float64
 7   Lower_Band        3732 non-null   float64
 8   S_Close(t-1)      3732 non-null   float64
 9   S_Close(t-2)      3732 non-null   float64
 10  S_Close(t-3)      3732 non-null   float64
 11  S_Close(t-5)      3732 non-null   float64
 12  S_Open(t-1)       3732 non-null   float64
 13  MA5               3732 non-null   float64
 14  MA10              3732 non-null   float64
 15  MA20              3732 non-null   float64
 16  MA50              3732 non-null 

In [152]:
#PLotting Time Series
import plotly.express as px
fig = px.line(df_Stock, x = 'Date_col', y = 'Close', title = 'AAPL Stock Price vs Time')
fig.show()

In [153]:
df_Stock = df_Stock.drop(['Date_col'], axis = 1)
def scaledataset(df_Stock):
    features = df_Stock.drop(['Close_forcast'], axis = 1)
    target = df_Stock['Close']
    
    data_len = df_Stock.shape[0]
    print(f'Stock Data Length: {data_len}')
    
    train_split = int(data_len*0.8)
    print(f'Training Set Length: {train_split}')
    
    val_split = train_split + int(data_len*0.1)
    print(f'Validation Set Length: {val_split}')
    
    print(f'Test Set Length: {int(data_len*0.2)}')
    
    X_train, X_val, X_test = features[:train_split], features[train_split:val_split], features[val_split:]
    Y_train, Y_val, Y_test = target[:train_split], target[train_split:val_split], target[val_split:]
        #print shape of samples
    print(X_train.shape, X_val.shape, X_test.shape)
    print(Y_train.shape, Y_val.shape, Y_test.shape)
    
    return X_train, X_val, X_test, Y_train, Y_val, Y_test

In [154]:
X_train,X_val, X_test, y_train, y_val, y_test = scaledataset(df_Stock)

Stock Data Length: 3732
Training Set Length: 2985
Validation Set Length: 3358
Test Set Length: 746
(2985, 61) (373, 61) (374, 61)
(2985,) (373,) (374,)


In [156]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [157]:
print('LR Coefficients: \n', lr.coef_)
print('LR Intercept: \n', lr.intercept_)

LR Coefficients: 
 [ 1.89468694e-08 -2.48338878e-08 -1.34573705e-08  1.00000002e+00
  8.67361738e-19 -6.84088619e-11  7.11900629e-10  3.68427039e-10
 -1.55414167e-08 -2.98403108e-09 -1.01243189e-09 -1.60534289e-09
  1.63351915e-10 -1.08884172e-08 -6.81641861e-09 -1.55370113e-09
  1.53083102e-09  5.56280161e-11  7.94270119e-09  7.94249957e-09
  7.94164329e-09  7.94012811e-09  7.94386049e-09 -7.73762240e-09
  1.18306137e-08 -1.70901321e-10 -7.17945599e-11 -6.67259754e-11
  5.55161389e-10 -4.06030314e-11  4.62839905e-11 -4.64528971e-11
  2.08068837e-08  0.00000000e+00 -2.77555756e-17  9.74352196e-12
 -4.27012037e-11 -9.34390759e-12 -2.42250664e-12  1.83739135e-12
  1.40389436e-12 -8.20797597e-12  1.45433856e-11 -2.81728720e-12
  1.27675648e-15  1.99230979e-11  2.39189224e-12  2.90733429e-10
  9.54458734e-11 -1.57264479e-11 -4.20483509e-11 -3.36247315e-11
  2.04708993e-11 -3.50434785e-11  1.51344423e-11 -4.63959079e-12
  2.19531338e-12  2.43569054e-12  2.62762034e-13 -1.62682368e-13
 -1.02

In [159]:
print("Performance (R^2): ", lr.score(X_train, y_train))

Performance (R^2):  1.0


In [160]:
def get_mape(y_true, y_pred): 
    """
    Compute mean absolute percentage error (MAPE)
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [161]:
y_train_pred = lr.predict(X_train)
y_val_pred = lr.predict(X_val)
y_test_pred = lr.predict(X_test)

In [163]:
print("Training R-squared: ",round(metrics.r2_score(y_train,y_train_pred),2))

print("Training Explained Variation: ",round(metrics.explained_variance_score(y_train,y_train_pred),2))

print('Training MAPE:', round(get_mape(y_train,y_train_pred), 2)) 

print('Training Mean Squared Error:', round(metrics.mean_squared_error(y_train,y_train_pred), 2)) 

print("Training RMSE: ",round(np.sqrt(metrics.mean_squared_error(y_train,y_train_pred)),2))

print("Training MAE: ",round(metrics.mean_absolute_error(y_train,y_train_pred),2))

print(' ')

print("Validation R-squared: ",round(metrics.r2_score(y_val,y_val_pred),2))
print("Validation Explained Variation: ",round(metrics.explained_variance_score(y_val,y_val_pred),2))
print('Validation MAPE:', round(get_mape(y_val,y_val_pred), 2)) 
print('Validation Mean Squared Error:', round(metrics.mean_squared_error(y_train,y_train_pred), 2)) 
print("Validation RMSE: ",round(np.sqrt(metrics.mean_squared_error(y_val,y_val_pred)),2))
print("Validation MAE: ",round(metrics.mean_absolute_error(y_val,y_val_pred),2))
print(' ')

print("Test R-squared: ",round(metrics.r2_score(y_test,y_test_pred),2))
print("Test Explained Variation: ",round(metrics.explained_variance_score(y_test,y_test_pred),2))
print('Test MAPE:', round(get_mape(y_test,y_test_pred), 2)) 
print('Test Mean Squared Error:', round(metrics.mean_squared_error(y_test,y_test_pred), 2)) 
print("Test RMSE: ",round(np.sqrt(metrics.mean_squared_error(y_test,y_test_pred)),2))
print("Test MAE: ",round(metrics.mean_absolute_error(y_test,y_test_pred),2))

Training R-squared:  1.0
Training Explained Variation:  1.0
Training MAPE: 0.0
Training Mean Squared Error: 0.0
Training RMSE:  0.0
Training MAE:  0.0
 
Validation R-squared:  1.0
Validation Explained Variation:  1.0
Validation MAPE: 0.0
Validation Mean Squared Error: 0.0
Validation RMSE:  0.0
Validation MAE:  0.0
 
Test R-squared:  1.0
Test Explained Variation:  1.0
Test MAPE: 0.0
Test Mean Squared Error: 0.0
Test RMSE:  0.0
Test MAE:  0.0


In [166]:
df_pred = pd.DataFrame(y_val.values, columns=['Actual'], index=y_val.index)
df_pred['Predicted'] = y_val_pred
df_pred = df_pred.reset_index()
df_pred.loc[:, 'Date'] = pd.to_datetime(df_pred['Date'],format='%Y-%m-%d')

In [171]:
import numpy as np
import pandas as pd

# plotly packages
import plotly
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
cf.go_offline()
df_pred[["Actual", "Predicted"]].iplot()