In [None]:
# !pip install chart_studio

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

# offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
tesla = pd.read_csv('../dataset/stock_price_predict/tesla.csv')
tesla.head()


In [None]:
google = pd.read_csv('../dataset/stock_price_predict/google.csv')
google.head()

In [None]:
tesla.info()

In [None]:
tesla['Date'] = pd.to_datetime(tesla['Date'])

In [None]:
tesla.info()

In [None]:
print(f'DataFrame contains stock prices between {tesla.Date.min()} ~ {tesla.Date.max()}')

In [None]:
print(f'Total days : {(tesla.Date.max() - tesla.Date.min()).days} days')

In [None]:
tesla.describe()

In [None]:
tesla[['Open','High','Low','Close','Adj Close']].plot(kind='box')

In [28]:
layout = go.Layout(
    title='Stock price of Tesla',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

In [29]:
tesla_data = [{'x':tesla['Date'], 'y':tesla['Close']}]
plot = go.Figure(data=tesla_data, layout=layout)

In [30]:
plot

In [31]:
# plot(plot) # plotting offline
iplot(plot)

In [32]:
# Regression
from sklearn.model_selection import train_test_split

# Preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# Model Evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [33]:
# split train, test
X= np.array(tesla.index).reshape(-1,1)
y = tesla['Close']
X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                    test_size=0.3,
                                                    random_state=101)

In [34]:
scaler = StandardScaler().fit(X_train)

In [36]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train, y_train)


In [38]:
# Plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = y_train,
    mode = 'markers',
    name = 'Actual'
)
trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)

In [39]:
tesla_data = [trace0,trace1]
layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=tesla_data, layout=layout)

In [40]:
iplot(plot2)

In [41]:
# Calculate scores for model evaluation

scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(y_train, lm.predict(X_train))}\t{r2_score(y_test, lm.predict(X_test))}
{'MSE'.ljust(10)}{mse(y_train, lm.predict(X_train))}\t{mse(y_test, lm.predict(X_test))}
'''
print(scores)


Metric           Train                Test        
r2_score  0.44921483057462286	0.4319788328864985
MSE       35834.067709721145	32083.18991181633

