## Naive Forecast

In [3]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import boxcox

from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score, mean_squared_error

import warnings

warnings.filterwarnings('ignore')

from IPython.display import display, Markdown

pd.set_option('display.max_rows', 999)
pd.set_option('display.max_columns', 999)
pd.set_option('display.width', 800)
pd.set_option('display.max_colwidth', None)
pd.options.display.float_format = '{:.5f}'.format

In [5]:
df = pd.read_csv('SPY.csv', index_col='Date', parse_dates=True)

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6126 entries, 1993-01-29 to 2017-05-25
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       6126 non-null   float64
 1   High       6126 non-null   float64
 2   Low        6126 non-null   float64
 3   Close      6126 non-null   float64
 4   Adj Close  6126 non-null   float64
 5   Volume     6126 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 335.0 KB


In [6]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1993-01-29,43.9687,43.9687,43.75,43.9375,43.9375,1003200
1993-02-01,43.9687,44.25,43.9687,44.25,44.25,480500
1993-02-02,44.2187,44.375,44.125,44.3437,44.3437,201300
1993-02-03,44.4062,44.8437,44.375,44.8125,44.8125,529400
1993-02-04,44.9687,45.0937,44.4687,45.0,45.0,531500


In [7]:
df['ClosePrediction'] = df['Close'].shift(1)

In [8]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ClosePrediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1993-01-29,43.9687,43.9687,43.75,43.9375,43.9375,1003200,
1993-02-01,43.9687,44.25,43.9687,44.25,44.25,480500,43.9375
1993-02-02,44.2187,44.375,44.125,44.3437,44.3437,201300,44.25
1993-02-03,44.4062,44.8437,44.375,44.8125,44.8125,529400,44.3437
1993-02-04,44.9687,45.0937,44.4687,45.0,45.0,531500,44.8125


In [9]:
y_true = df.iloc[1:]['Close']
y_pred = df.iloc[1:]['ClosePrediction']

In [10]:
#Metrics

#SSE
print('')
print('SSE')
print((y_true - y_pred).dot(y_true - y_pred))

#MSE
print('')
print('MSE')
print(mean_squared_error(y_true,y_pred))

#RMSE
print('')
print('RMSE')
print(mean_squared_error(y_true,y_pred, squared=False))

#MAE
print('')
print('MAE')
print(mean_absolute_error(y_true,y_pred))

#MAPE
print('')
print('MAPE')
print(mean_absolute_percentage_error(y_true,y_pred))

#R2
print('')
print('R2')
print(r2_score(y_true,y_pred))


SSE
11668.570293787614

MSE
1.9050727010265491

RMSE
1.3802437107360965

MAE
0.9364204628571428

MAPE
0.007879649735851743

R2
0.9991057444706286


In [11]:
#sMAPE

def smape(y_true,y_pred):
    num = np.abs(y_true - y_pred)
    den = (np.abs(y_true) + np.abs(y_pred)) / 2
    ratio = num/den
    return ratio.mean()

print(smape(y_true,y_pred))

0.007877261227400215
