# Prediction

Using the historical fluctations of Amazon, Dow Jones, and NASDAQ 100, we want to predict tomorrow's Amazon's price.

This lesson assumes some understanding of stock market trading. If you don't know much about it, read/watch the following items:

- How does the stock market work? (YouTube): https://www.youtube.com/watch?v=p7HKvqRI_Bo
- What Are the S&P 500, Nasdaq, and the Dow?: https://www.thebalance.com/the-sandp-500-nasdaq-dow-jones-what-is-this-stuff-453745

In [None]:
import requests
import numpy as np
import pandas as pd
import datetime
import time
import matplotlib.pyplot as plt
import matplotlib

In [None]:
import yfinance as yf

AMZN = yf.Ticker("AMZN")
# get historical market data
amz_df = AMZN.history(period="357d")
amz_df=amz_df.reset_index()
amz_df = amz_df.reset_index()
#amz_df.set_index('Date',inplace=True)
#amz_df
amz_df.columns = ['i','t', 'o', 'h', 'l', 'c', 'v', 'd', 'ss']
#amz_df

amz_df.t = amz_df.t.apply(lambda x:x.date())

def yes_changeP_calc(row):
    if(row.i - 2 < 0):
        return np.nan
    else:
        yest_c = amz_df.iloc[row.i-1].c
        thedaybef_yesterday_c = amz_df.iloc[row.i-2].c
        return ((yest_c-thedaybef_yesterday_c)/thedaybef_yesterday_c*100)
    
amz_df['yes_changeP'] = amz_df.apply(yes_changeP_calc,axis=1)

def lastweek_changeP_calc(row):
    if(row.i - 8 <0):
        return np.nan
    else:
        yesterday_c = amz_df.iloc[row.i-1].c
        lastweek_c = amz_df.iloc[row.i-8].c
        return ((yesterday_c-lastweek_c)/lastweek_c*100)

amz_df['lastweek_changeP'] = amz_df.apply(lastweek_changeP_calc,axis=1)

def today_changeP_calc(row):
    if(row.i - 1 < 0):
        return np.nan
    else:
        today_c = amz_df.iloc[row.i].c
        yesterday_c = amz_df.iloc[row.i-1].c
        return ((today_c-yesterday_c)/yesterday_c*100)
    
amz_df['today_changeP'] = amz_df.apply(today_changeP_calc,axis=1)

amz_df.set_index('t', inplace=True)
#amz_df

Nasdaq = yf.Ticker("^NDX")

# get historical market data
nasdaq_df = Nasdaq.history(period="357d")

nasdaq_df=nasdaq_df.reset_index()
nasdaq_df = nasdaq_df.reset_index()
#nasdaq_df.set_index('Date',inplace=True)
#nasdaq_df

nasdaq_df.columns = ['i','t', 'o', 'h', 'l', 'c', 'v', 'd', 'ss']
#nasdaq_df

nasdaq_df.t = nasdaq_df.t.apply(lambda x:x.date())

def yes_changeP_calc(row):
    if(row.i - 2 < 0):
        return np.nan
    else:
        yest_c = nasdaq_df.iloc[row.i-1].c
        thedaybef_yesterday_c = nasdaq_df.iloc[row.i-2].c
        return ((yest_c-thedaybef_yesterday_c)/thedaybef_yesterday_c*100)

def lastweek_changeP_calc(row):
    if(row.i - 8 <0):
        return np.nan
    else:
        yesterday_c = nasdaq_df.iloc[row.i-1].c
        lastweek_c = nasdaq_df.iloc[row.i-8].c
        return ((yesterday_c-lastweek_c)/lastweek_c*100)

nasdaq_df['yes_changeP'] = nasdaq_df.apply(yes_changeP_calc,axis=1)
nasdaq_df['lastweek_changeP'] = nasdaq_df.apply(lastweek_changeP_calc,axis=1)
nasdaq_df.set_index('t', inplace=True)
#nasdaq_df

nasdaq_df.drop(columns=['i','o', 'h', 'l', 'c', 'v', 'd', 'ss'],inplace=True)
nasdaq_df.columns = ['nasdaq_yes_changeP','nasdaq_lastweek_changeP']
#nasdaq_df

Dow = yf.Ticker("^DJI")

# get historical market data
dow_df = Dow.history(period="357d")

dow_df=dow_df.reset_index()
dow_df = dow_df.reset_index()
#dow_df.set_index('Date',inplace=True)
dow_df.columns = ['i','t', 'o', 'h', 'l', 'c', 'v', 'd', 'ss']
#dow_df

dow_df.t = dow_df.t.apply(lambda x:x.date())

def yes_changeP_calc(row):
    if(row.i - 2 < 0):
        return np.nan
    else:
        yest_c = dow_df.iloc[row.i-1].c
        thedaybef_yesterday_c = dow_df.iloc[row.i-2].c
        return ((yest_c-thedaybef_yesterday_c)/thedaybef_yesterday_c*100)


def lastweek_changeP_calc(row):
    if(row.i - 8 <0):
        return np.nan
    else:
        yesterday_c = dow_df.iloc[row.i-1].c
        lastweek_c = dow_df.iloc[row.i-8].c
        return ((yesterday_c-lastweek_c)/lastweek_c*100)
    
dow_df['yes_changeP'] = dow_df.apply(yes_changeP_calc,axis=1)
dow_df['lastweek_changeP'] = dow_df.apply(lastweek_changeP_calc,axis=1)
dow_df.set_index('t', inplace=True)
#dow_df

dow_df.drop(columns=['i','o', 'h', 'l', 'c', 'v', 'd', 'ss'],inplace=True)
dow_df.columns = ['dow_yes_changeP','dow_lastweek_changeP']
#dow_df

integrate_df = amz_df.join(dow_df)
integrate_df = integrate_df.join(nasdaq_df)
integrate_df.drop(columns = ['i','o', 'h', 'l', 'c', 'v', 'd', 'ss'],inplace=True)
integrate_df.dropna(inplace=True)
integrate_df = integrate_df[['yes_changeP', 'lastweek_changeP', 'dow_yes_changeP',
       'dow_lastweek_changeP', 'nasdaq_yes_changeP',
       'nasdaq_lastweek_changeP', 'today_changeP']]
integrate_df

# Today
AMZN = yf.Ticker("AMZN")
# get historical market data
amz_df = AMZN.history(period="8d")
amz_df

Yes_changeP = (amz_df.iloc[7].Close-amz_df.iloc[6].Close)/amz_df.iloc[6].Close*100
Lastweek_changeP = (amz_df.iloc[7].Close-amz_df.iloc[1].Close)/amz_df.iloc[1].Close*100

Dow = yf.Ticker("^DJI")

# get historical market data
dow_df = Dow.history(period="8d")

Dow_yes_changeP = (dow_df.iloc[7].Close-dow_df.iloc[6].Close)/dow_df.iloc[6].Close*100
Dow_lastweek_changeP = (dow_df.iloc[7].Close-dow_df.iloc[1].Close)/dow_df.iloc[1].Close*100

Nasdaq = yf.Ticker("^NDX")

# get historical market data
nasdaq_df = Nasdaq.history(period="8d")

Nasdaq_yes_changeP = (nasdaq_df.iloc[7].Close-nasdaq_df.iloc[6].Close)/nasdaq_df.iloc[6].Close*100
Nasdaq_lastweek_changeP = (nasdaq_df.iloc[7].Close-nasdaq_df.iloc[1].Close)/nasdaq_df.iloc[1].Close*100

to_add = pd.Series ({'yes_changeP':Yes_changeP,
            'lastweek_changeP':Lastweek_changeP,
            'dow_yes_changeP':Dow_yes_changeP, 
            'dow_lastweek_changeP':Dow_lastweek_changeP,
            'nasdaq_yes_changeP':Nasdaq_yes_changeP,
            'nasdaq_lastweek_changeP':Nasdaq_lastweek_changeP,
            'today_changeP':np.nan},name ='2021-03-06' )

to_predict = pd.Series ({'yes_changeP':Yes_changeP,
            'lastweek_changeP':Lastweek_changeP,
            'dow_yes_changeP':Dow_yes_changeP, 
            'dow_lastweek_changeP':Dow_lastweek_changeP,
            'nasdaq_yes_changeP':Nasdaq_yes_changeP,
            'nasdaq_lastweek_changeP':Nasdaq_lastweek_changeP},name ='2021-03-06' )

integrate_df = integrate_df.append(to_add)

In [None]:
integrate_df

## What are the columns?

- `yes_changeP`: Yesterday Amazon's stock price change
\begin{equation}
yesChangeP=\frac{yesterdayPrice - theDayBeforeYesterdayPrice}{theDayBeforeYesterdayPrice}*100
\end{equation}



- `lastweek_changeP`: Last week Amazon's stock price change
\begin{equation}
lastweekChangeP=\frac{yesterdayPrice - lastWeekPrice}{lastWeekPrice}*100
\end{equation}



- `dow_yes_changeP`: Yesterday Dow Jones change
\begin{equation}
dowYesChangeP=\frac{dowYesterdayPrice - dowTheDayBeforeYesterdayPrice}{dowTheDayBeforeYesterdayPrice}*100
\end{equation}


- `dow_lastweek_changeP`: Last Week Dow Jones change
\begin{equation}
dowYesChangeP=\frac{dowYesterdayPrice - dowLastWeekPrice}{DowLastWeekPrice}*100
\end{equation}


- `nasdaq_yes_changeP`: Yesterday NASDAQ 100 change
\begin{equation}
nasdaqYesChangeP=\frac{nasdaqYesterdayPrice - nasdaqTheDayBeforeYesterdayPrice}{nasdaqTheDayBeforeYesterdayPrice}*100
\end{equation}


- `nasdaq_lastweek_changeP`: Last Week NASDAQ 100 change
\begin{equation}
nasdaqYesChangeP=\frac{nasdaqYesterdayPrice - nasdaqLastWeekPrice}{nasdaqLastWeekPrice}*100
\end{equation}


- `today_changeP`: Today Amazon's stock price change
\begin{equation}
todayChangeP=\frac{todayPrice - yesterdayPrice}{yesterdayPrice}*100
\end{equation}


## Prediciton model in words

We are trying to find a connection between predictors, including `yes_changeP`, `lastweek_changeP`, `dow_yes_changeP`,`dow_lastweek_changeP`, `nasdaq_yes_changeP`, `nasdaq_lastweek_changeP`, to predict the target that is  `today_changeP`.

## Prediciton model visually

In [None]:
import matplotlib.image as mpimg

img = mpimg.imread('visual.jpg')
plt.figure(figsize=(10,20))
plt.imshow(img)
plt.show()


## Prediciton model expressed in a multivariate regression model
`today_changeP` = b0 + b1 * `yes_changeP` + b2*`lastweek_changeP` + b3* `dow_yes_changeP` + b4 * `dow_lastweek_changeP`+ b5 *`nasdaq_yes_changeP` + b6* `nasdaq_lastweek_changeP`

## Regression model using Python

In [None]:
X = ['yes_changeP', 'lastweek_changeP', 'dow_yes_changeP',
       'dow_lastweek_changeP', 'nasdaq_yes_changeP',
      'nasdaq_lastweek_changeP']

Y = 'today_changeP'

Data_X = integrate_df.dropna()[X]
Data_Y = integrate_df.dropna()[Y]

In [None]:
from sklearn.linear_model import LinearRegression
from dmba import regressionSummary, exhaustive_search

lm = LinearRegression()
lm.fit(Data_X, Data_Y)

In [None]:
# print coefficients
print('intercept (b0) ', lm.intercept_)
coef_names = ['b1','b2','b3','b4','b5','b6']
print(pd.DataFrame({'Predictor': Data_X.columns,'coefficient Name':coef_names, 'coefficient Value': lm.coef_}))

# print performance measures
regressionSummary(Data_Y, lm.predict(Data_X))

In [None]:
pred_y = lm.predict(Data_X)
from dmba import adjusted_r2_score, AIC_score, BIC_score

print('adjusted r2 : ', adjusted_r2_score(Data_Y, pred_y, lm))
print('AIC : ', AIC_score(Data_Y, pred_y, lm))
print('BIC : ', BIC_score(Data_Y, pred_y, lm))

In [None]:
pd.DataFrame(to_predict).transpose()

In [None]:
tomorrows_change = lm.predict(pd.DataFrame(to_predict).transpose())[0]
tomorrows_change

In [None]:
today_price = amz_df.iloc[-1].Close
today_price

In [None]:
Tomorrow_price = today_price * (1+ tomorrows_change/100)
Tomorrow_price

# Method 1 Statistical

In [None]:
import statsmodels.api as sm

Data_X2 = sm.add_constant(Data_X)
model = sm.OLS(Data_Y, Data_X2)
model.fit().summary()

In [None]:
X = ['dow_yes_changeP']

Y = 'today_changeP'

Data_X = integrate_df.dropna()[X]
Data_Y = integrate_df.dropna()[Y]

Data_X2 = sm.add_constant(Data_X)
est = sm.OLS(Data_Y, Data_X2)
est2 = est.fit()
print(est2.summary())

# Method 2 | Corealtion Aanalysis

In [None]:
integrate_df.corr()

In [None]:
integrate_df.plot.scatter(x='yes_changeP',y='nasdaq_yes_changeP')

In [None]:
X = ['yes_changeP','nasdaq_lastweek_changeP','dow_yes_changeP']
Y = 'today_changeP'

Data_X = integrate_df.dropna()[X]
Data_Y = integrate_df.dropna()[Y]

Data_X2 = sm.add_constant(Data_X)
est = sm.OLS(Data_Y, Data_X2)
est2 = est.fit()
print(est2.summary())

# Method 3 Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
from matplotlib import pyplot as plt
import pydotplus
from sklearn import tree

X = ['yes_changeP', 'lastweek_changeP', 'dow_yes_changeP',
       'dow_lastweek_changeP', 'nasdaq_yes_changeP',
      'nasdaq_lastweek_changeP']
Y = 'today_changeP'

Data_X = integrate_df.dropna()[X]
Data_Y = integrate_df.dropna()[Y]

regr = DecisionTreeRegressor(max_depth=1)
regr.fit(Data_X, Data_Y)

fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(regr, feature_names=X, filled=True)


# Method 4 - Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=500, random_state=1)
rf.fit(Data_X, Data_Y)

importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_], axis=0)

df = pd.DataFrame({'feature': X, 'importance': importances, 'std': std})
df = df.sort_values('importance')
print(df)

ax = df.plot(kind='barh', xerr='std', x='feature', legend=False)
ax.set_ylabel('')

plt.tight_layout()
plt.show()