# Stock predictions

I wanted to invest three S&P 500 Companies which are Apple, AMD and NVDA and i got their Historical Data of the recent 6 months. I will try to use linear Regression model to analyze how the their close price changes over time.

## Apple

In [52]:
import pandas as pd
Apple = pd.read_csv('Apple.csv')
Apple

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,5/18/21,124.85,63342930,126.56,126.99,124.78
1,5/19/21,124.69,92611990,123.16,124.92,122.86
2,5/20/21,127.31,76857120,125.23,127.72,125.10
3,5/21/21,125.43,79295440,127.82,128.00,125.21
4,5/24/21,127.10,63092950,126.01,127.94,125.94
...,...,...,...,...,...,...
124,11/11/21,147.87,40999950,148.96,149.43,147.68
125,11/12/21,149.99,63804010,148.43,150.40,147.48
126,11/15/21,150.00,59222800,150.37,151.88,149.43
127,11/16/21,151.00,59256210,149.94,151.49,149.34


In [80]:
Apple['Data'] = pd.to_datetime(Apple['Date'])

In [81]:
x = Apple['Date']
x

0       5/18/21
1       5/19/21
2       5/20/21
3       5/21/21
4       5/24/21
         ...   
124    11/11/21
125    11/12/21
126    11/15/21
127    11/16/21
128    11/17/21
Name: Date, Length: 129, dtype: object

In [54]:
y = Apple['Close/Last']
y

0      124.85
1      124.69
2      127.31
3      125.43
4      127.10
        ...  
124    147.87
125    149.99
126    150.00
127    151.00
128    153.49
Name: Close/Last, Length: 129, dtype: float64

In [87]:
from sklearn.linear_model import LinearRegression
Model = LinearRegression()
import numpy as np
x = np.array(Apple.index).reshape(-1,1)
Model.fit(x,y)

LinearRegression()

In [63]:
predictions = Model.predict([[11/18/21],[11/19/21]])
predictions

array([154.15127464, 154.15154562])

We can see here we use the model to predict the close prices of the next two days, both of the prices are incresing, so i can invest this company.

In [44]:
import chart_studio.plotly as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
%matplotlib inline
from plotly.offline import plot
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [82]:
layout = go.Layout(
    title = 'stock prices of Apple',
    xaxis = dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f')
),
    yaxis=dict(
        title='price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f')
    )
)
apple_data = [{'x':Apple['Date'], 'y':Apple['Close/Last']}]
plot = go.Figure(data=apple_data, layout=layout)
plot.show()

In [83]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [89]:
x = np.array(Apple.index).reshape(-1,1)
y = Apple['Close/Last']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [90]:
scaler = StandardScaler().fit(x_train)

In [91]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train)

LinearRegression()

In [98]:
trace0 = go.Scatter(
    x = x_train.T[0],
    y = y_train,
    mode = 'markers',
    name = 'Acutal' )
trace1 = go.Scatter(
    x = x_train.T[0],
    y = model.predict(x_train).T,
    mode = 'lines',
    name = 'predicted')
Apple_data = [trace0, trace1]
layout.xaxis.title.text = 'Days'
plot2 = go.Figure(data=Apple_data, layout=layout)
plot2

In [105]:
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(y_train, model.predict(x_train))}\t{r2_score(y_test, model.predict(x_test))}
{'MSE'.ljust(10)}{mse(y_train, model.predict(x_train))}\t{mse(y_test, model.predict(x_test))}
'''

In [106]:
print(scores)


Metric           Train                Test        
r2_score  0.5789303794061145	0.5170179543764332
MSE       30.8593396505989	38.59048474012951



r2_score is a measure of how well a linear regression model fits the data, if the score is closer to 1, it indicates that the regression predictions perfectly fit the data.
And MSE hereis the average of the square of the errors. The larger the number the larger the error.

## AMD

In [166]:
AMD = pd.read_csv('AMD.csv')
AMD['Data'] = pd.to_datetime(AMD['Date'])
AMD

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low,Data
0,5/18/21,74.44,31982480,74.74,75.77,74.24,2021-05-18
1,5/19/21,76.23,73966160,73.16,76.97,72.76,2021-05-19
2,5/20/21,78.06,49135010,76.81,78.27,76.25,2021-05-20
3,5/21/21,77.17,40201090,78.55,78.81,77.04,2021-05-21
4,5/24/21,77.44,33194290,77.26,78.01,76.80,2021-05-24
...,...,...,...,...,...,...,...
124,11/11/21,146.01,67934850,142.96,146.47,140.84,2021-11-11
125,11/12/21,147.89,52162140,146.03,148.59,144.25,2021-11-12
126,11/15/21,146.49,52271270,148.00,148.98,142.86,2021-11-15
127,11/16/21,152.45,53100810,145.93,153.08,145.34,2021-11-16


In [158]:
layout = go.Layout(
    title = 'stock prices of AMD',
    xaxis = dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f')
),
    yaxis=dict(
        title='price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f')
    )
)
AMD_data = [{'x':AMD['Date'], 'y':AMD['Close/Last']}]
plot3 = go.Figure(data=AMD_data, layout=layout)
plot3.show()

In [189]:
X = np.array(AMD.index).reshape(-1,1)
Y = AMD['Close/Last']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
scaler = StandardScaler().fit(X_train)

In [190]:
Model = LinearRegression()
Model.fit(X_train, Y_train)

LinearRegression()

In [199]:
trace3 = go.Scatter(
    x = X_train.T[0],
    y = Y_train,
    mode = 'markers',
    name = 'Acutal' )
trace4 = go.Scatter(
    x = X_train.T[0],
    y = Model.predict(X_train).T,
    mode = 'lines',
    name = 'predicted')
AMD_data = [trace3, trace4]
layout.xaxis.title.text = 'Days'
plot4 = go.Figure(data=AMD_data, layout=layout)
plot4

In [196]:
scores2 = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(Y_train, model.predict(X_train))}\t{r2_score(Y_test, model.predict(X_test))}
{'MSE'.ljust(10)}{mse(Y_train, model.predict(X_train))}\t{mse(Y_test, model.predict(X_test))}
'''

In [198]:
print(scores2)


Metric           Train                Test        
r2_score  -35.31513505817088	-42.82737387714736
MSE       12690.978692719522	12263.485984731607



The scores here is similar to the Apple, i think it is not too bad and the line of perdition also has a positive slope which means the price will increse.

## NVDA

In [164]:
NVDA = pd.read_csv('NVDA.csv')
NVDA['Data'] = pd.to_datetime(NVDA['Date'])
NVDA

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low,Data
0,5/20/21,146.1250,32159192,143.03,146.84,142.71,2021-05-20
1,5/21/21,149.9175,67299200,151.64,152.22,148.69,2021-05-21
2,5/24/21,156.1200,55481640,152.13,157.45,151.76,2021-05-24
3,5/25/21,156.4775,43540680,157.66,158.19,154.83,2021-05-25
4,5/26/21,157.0000,37043864,157.34,157.94,155.86,2021-05-26
...,...,...,...,...,...,...,...
124,11/15/21,300.2500,38490900,305.52,306.44,292.47,2021-11-15
125,11/16/21,302.0300,26448370,297.59,303.90,297.06,2021-11-16
126,11/17/21,292.6100,42850800,304.18,305.09,288.00,2021-11-17
127,11/18/21,316.7500,78171080,323.67,327.60,313.21,2021-11-18


In [172]:
layout = go.Layout(
    title = 'stock prices o',
    xaxis = dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f')
),
    yaxis=dict(
        title='price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f')
    )
)
NVDA_data = [{'x':NVDA['Date'], 'y':NVDA['Close/Last']}]
plot6 = go.Figure(data=NVDA_data, layout=layout)
plot6.show()

In [183]:
x1 = np.array(NVDA.index).reshape(-1,1)
y1 = NVDA['Close/Last']
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.3)
scaler = StandardScaler().fit(x1_train)

In [185]:
model = LinearRegression()
model.fit(x1_train, y1_train)
trace5 = go.Scatter(
    x = x1_train.T[0],
    y = y1_train,
    mode = 'markers',
    name = 'Acutal' )
trace6 = go.Scatter(
    x = x1_train.T[0],
    y = model.predict(x1_train).T,
    mode = 'lines',
    name = 'predicted')
NVDA_data = [trace5, trace6]
layout.xaxis.title.text = 'Days'
plot6 = go.Figure(data=NVDA_data, layout=layout)
plot6

In [186]:
scores = f'''
{'Metric'.ljust(10)}{'Train'.center(20)}{'Test'.center(20)}
{'r2_score'.ljust(10)}{r2_score(y1_train, model.predict(x1_train))}\t{r2_score(y1_test, model.predict(x1_test))}
{'MSE'.ljust(10)}{mse(y1_train, model.predict(x1_train))}\t{mse(y1_test, model.predict(x1_test))}
'''
print(scores)


Metric           Train                Test        
r2_score  0.7119890422415761	0.7339993756305982
MSE       393.81799817744206	354.6855616990026



We can see the r2_score is closer to 1 which means regression prediction fit the data better, so it means the stock price will probably increse like the predicted line but the MSE is larger which means the risk is larger. This stock will be high risk and high profit since it has a better r2_score and bigger MSE.