In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

#for offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

In [None]:
csv_file = 'GOOG.csv' #replace with the proper .csv file name
csv_file_title = csv_file.split(".")[0]
google = pd.read_csv(csv_file) 
google.head()
len(google)

18

In [59]:
google.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   datetime  18 non-null     object 
 1   open      18 non-null     float64
 2   high      18 non-null     float64
 3   low       18 non-null     float64
 4   close     18 non-null     float64
 5   volume    18 non-null     int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 992.0+ bytes


In [60]:
google['datetime'] = pd.to_datetime(google['datetime'])

In [61]:
print(f'Dataframe contains stock prices between {google.datetime.min()} {google.datetime.max()}') 
print(f'Total days = {(google.datetime.max()  - google.datetime.min()).days} days')

Dataframe contains stock prices between 2024-09-30 00:00:00 2024-10-23 00:00:00
Total days = 23 days


In [62]:
google.describe()

Unnamed: 0,open,high,low,close,volume
count,18.0,18.0,18.0,18.0,18.0
mean,0.851667,0.876111,0.826667,0.845556,94338.888889
std,0.035021,0.02852,0.03272,0.035016,75117.634327
min,0.8,0.83,0.77,0.77,11400.0
25%,0.83,0.8525,0.8025,0.8225,41800.0
50%,0.85,0.875,0.82,0.835,81900.0
75%,0.87,0.8975,0.86,0.87,111225.0
max,0.92,0.93,0.87,0.91,285200.0


In [63]:
# Setting the layout for our plot
layout = go.Layout(
    title=f'Stock Prices of {csv_file_title} from 2019.03.18 - 2023.11.02',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

google_data = [{'x':google['datetime'], 'y':google['close']}]
plot = go.Figure(data=google_data, layout=layout)

In [64]:
#plot(plot) #plotting offline
iplot(plot)

In [65]:
# Building the regression model
from sklearn.model_selection import train_test_split

#For preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [66]:
#Split the data into train and test sets
X = np.array(google.index).reshape(-1,1)
Y = google['close']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

In [67]:
# Feature scaling
scaler = StandardScaler().fit(X_train)

In [68]:
from sklearn.linear_model import LinearRegression

In [69]:
#Creating a linear model
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [70]:
#Plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)
google_data = [trace0,trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=google_data, layout=layout)

In [71]:
iplot(plot2)

In [72]:
#Calculate scores for model evaluation
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, lm.predict(X_train))}\t{r2_score(Y_test, lm.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, lm.predict(X_train))}\t{mse(Y_test, lm.predict(X_test))}
'''
print(scores)


Metric           Train                Test        
r2_score  0.4005293377531678	-0.8886615125371655
MSE       0.0008738117500389592	0.001033517549916173

