In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
df = pd.read_csv('data/MSFT.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1986-03-14,0.097222,0.102431,0.097222,0.100694,0.063158,308160000
1,1986-03-17,0.100694,0.103299,0.100694,0.102431,0.064247,133171200
2,1986-03-18,0.102431,0.103299,0.098958,0.099826,0.062613,67766400
3,1986-03-19,0.099826,0.100694,0.097222,0.09809,0.061524,47894400
4,1986-03-20,0.09809,0.09809,0.094618,0.095486,0.059891,58435200


In [12]:
X = df[['Open']].values.reshape(-1, 1)
y = df[['High']].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

### Create Linear Regression Model

In [13]:
model = LinearRegression()
model.fit(X_train, y_train)


In [14]:
print('Coefficient:', model.coef_[0,0])
print('Intercept:',model.intercept_[0])
print('Equation: High = ' + str(np.round(model.coef_[0,0],2)) + 
      '*Open + ' + str(np.round(model.intercept_[0],2)))

Coefficient: 1.009894673042586
Intercept: 0.033740792417681575
Equation: High = 1.01*Open + 0.03


In [15]:
#test set
y_pred = model.predict(X_test)


#predict Unknown Values
nopen = np.array([[5]])
High = model.predict(nopen)
print('#Years:', nopen[0,0])
print('~High:', np.round(High[0,0],2) )

#Years: 5
~High: 5.08


In [16]:
mse =  np.power((y_test - y_pred),2).sum()/len(y_test)
print('Mean Squared Error (MSE)  : ', np.round(mse,2))

Mean Squared Error (MSE)  :  0.61


In [17]:
#For regression line
x_range = np.linspace(X.min(), X.max(), 100)
y_range = model.predict(x_range.reshape(-1, 1))

In [18]:
fig = go.Figure([
    go.Scatter(x=X_train[:,0], y=y_train[:,0],name='train', mode='markers'),
    go.Scatter(x=X_test[:,0], y=y_test[:,0],name='test', mode='markers'),
    go.Scatter(x=x_range.squeeze(),y=y_range.squeeze(),name='Linear Regression Fit')    
])

fig.update_layout(
    title="Linear Regression for Prediction High based on Open ",
    xaxis_title="Open",
    yaxis_title="High",
    font=dict(
        family="Courier New, monospace",
        size=10,
        color="RebeccaPurple"
    )
)

fig.show()

## Polynomial Regression 


In [19]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [28]:
df = pd.read_csv('data/MSFT.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1986-03-14,0.097222,0.102431,0.097222,0.100694,0.063158,308160000
1,1986-03-17,0.100694,0.103299,0.100694,0.102431,0.064247,133171200
2,1986-03-18,0.102431,0.103299,0.098958,0.099826,0.062613,67766400
3,1986-03-19,0.099826,0.100694,0.097222,0.09809,0.061524,47894400
4,1986-03-20,0.09809,0.09809,0.094618,0.095486,0.059891,58435200


In [29]:
X = df[['Open']].values.reshape(-1, 1)
y = df[['High']].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

In [30]:
poly = PolynomialFeatures(degree=3, include_bias=False)

In [31]:
X_train_poly = poly.fit_transform(X_train.reshape(-1, 1))

In [32]:
poly_reg_model = LinearRegression()

In [33]:
poly_reg_model.fit(X_train_poly, y_train)

In [34]:
X_test_poly = poly.fit_transform(X_test.reshape(-1, 1))
y_pred = poly_reg_model.predict(X_test_poly)

In [35]:
mse_poly =  np.power((y_test - y_pred),2).sum()/len(y_test)
print('Mean Squared Error (MSE)  : ', np.round(mse_poly,2))

Mean Squared Error (MSE)  :  0.61


In [36]:
x_range = np.linspace(X.min(), X.max(), 100)
y_range = poly_reg_model.predict(poly.fit_transform(x_range.reshape(-1, 1)))

In [37]:
fig = go.Figure([
    go.Scatter(x=X_train[:,0], y=y_train[:,0],name='train', mode='markers'),
    go.Scatter(x=X_test[:,0], y=y_test[:,0],name='test', mode='markers'),
    go.Scatter(x=x_range.squeeze(),y=y_range.squeeze(),name='Linear Regression Fit')    
])

fig.update_layout(
    title="Linear Regression for Prediction High based on Open",
    xaxis_title="Open",
    yaxis_title="High",
    font=dict(
        family="Courier New, monospace",
        size=10,
        color="RebeccaPurple"
    )
)

fig.show()