In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# import chart_studio.plotly as py
import plotly.graph_objs as go

In [5]:
sp500 = pd.read_csv('^SPX (2).csv')

In [6]:
sp500['Date'] = pd.to_datetime(sp500['Date'])

In [7]:
#Layout
layout = go.Layout(
    title='Stock Prices of sp500',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

In [8]:
#Building the regression model
from sklearn.model_selection import train_test_split

#preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [9]:
#split data into train and test
X = np.array(sp500.index).reshape(-1,1)
Y = sp500['Close']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

In [10]:
#Feature Scaling
scaler = StandardScaler().fit(X_train)

In [11]:
from sklearn.linear_model import LinearRegression

In [12]:
# Create linear model
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [13]:
#plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)

sp500_data = [trace0,trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=sp500_data, layout=layout)

In [14]:
plot2.show()

In [18]:
#Scores
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lm.predict(X_train))}\t{r2_score(Y_test, lm.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, lm.predict(X_train))}\t{mse(Y_test, lm.predict(X_test))}
{'RMSE'.ljust(10)}{mse(Y_train, lm.predict(X_train), squared=False)}\t{mse(Y_test, lm.predict(X_test), squared=False)}
'''

print(scores)


Metric           Train                Test        
r2_score  0.9304283399974951	0.9339804766284772
MSE       85601.03807810986	79197.54847994441
RMSE      292.57655080014507	281.42059000710026

