# 07 Modelling-Multiple linear Regression


---

# Contents

- [1.0 About Prophet Model](#1.0-About-Prophet-Model)
- [2.0 Load Data](#2.0-Load-Data)
    - [2.1 EUR/USD Data](#2.1-EUR/USD-Data)
    - [2.2 Pattern Data](#2.2-Pattern-Data)
- [3.0 Model](#3.0-Model)
    - [3.1 Train/Test Split](#3.1-Train/Test-Split)
    - [3.2 Fit Model](#3.2-Fit-Model)
    - [3.3 Calculate Results](#3.3-Calculate-Results)
    - [3.4 Run Model](#3.4-Run-Model)
- [4.0 Results](#4.0-Results)
- [5.0 Observations](#5.0-Observations)



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import datetime
import calendar

from sklearn.metrics import r2_score, mean_squared_error
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

from fbprophet import Prophet
from statsmodels.tools.eval_measures import rmse


In [3]:
pd.set_option('display.max_columns', None)

---

# 1.0  Model

About Prophet

More About Prophet

---

# 2.0 Load Data

## 2.1 EUR/USD Data

In [8]:
# read in the daily data
daily = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/eur-usd2daily.csv', 
                    index_col='date', parse_dates=True)

In [10]:
daily.head()

Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,marubozu,marubozu+1,marubozu-1,marubozu-2,day-1_open,day-2_open,day-3_open,day-1_high,day-2_high,day-3_high,day-1_low,day-2_low,day-3_low,day-1_close,day-2_close,day-3_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,exit_price,select,target,date+5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2000-07-14,0.9353,0.9389,0.9318,0.9383,0.9368,0.94522,0.948145,0.00325,0.003387,-0.016431,-0.014102,-0.017514,0.003,0.0062,0.0086,0.0041,1.0,97.4,122.0,0,0.0,-1.0,-1.0,0.9416,0.9503,0.9545,0.9425,0.9517,0.9569,0.933,0.9396,0.9496,0.9354,0.9417,0.9504,0.9382,0.9402,0.9342,0.936,0.9368,0.9227,0.927,0.9193,0.9342,0.9204,0.9384,0.9319,0.9413,0,0.9402,2000-07-21
2000-07-17,0.9382,0.9402,0.9342,0.936,0.9371,0.94217,0.9471,0.004282,0.00344,-0.009408,-0.016013,-0.014875,0.0022,0.003,0.0062,0.0086,-1.0,98.1,123.3,0,-1.0,0.0,-1.0,0.9353,0.9416,0.9503,0.9389,0.9425,0.9517,0.9318,0.933,0.9396,0.9383,0.9354,0.9417,0.9361,0.9368,0.9227,0.9256,0.927,0.9193,0.9342,0.9204,0.9384,0.9319,0.9367,0.9313,0.9338,0,0.9193,2000-07-24
2000-07-18,0.9361,0.9368,0.9227,0.9256,0.93085,0.93785,0.945633,0.003582,0.003797,-0.008151,-0.022678,-0.020519,0.0105,0.0022,0.003,0.0062,-1.0,98.3,123.8,-1,0.0,0.0,0.0,0.9382,0.9353,0.9416,0.9402,0.9389,0.9425,0.9342,0.9318,0.933,0.936,0.9383,0.9354,0.9255,0.927,0.9193,0.9246,0.9342,0.9204,0.9384,0.9319,0.9367,0.9313,0.9433,0.9329,0.9151,0,0.9193,2000-07-25
2000-07-19,0.9255,0.927,0.9193,0.9246,0.92505,0.93366,0.944207,0.003915,0.003618,-0.012543,-0.022146,-0.029125,0.0009,0.0105,0.0022,0.003,-1.0,97.0,123.3,0,1.0,-1.0,0.0,0.9361,0.9382,0.9353,0.9368,0.9402,0.9389,0.9227,0.9342,0.9318,0.9256,0.936,0.9383,0.9245,0.9342,0.9204,0.9325,0.9384,0.9319,0.9367,0.9313,0.9433,0.9329,0.945,0.9391,0.9237,0,0.9204,2000-07-26
2000-07-20,0.9245,0.9342,0.9204,0.9325,0.9285,0.93166,0.943221,0.005881,0.004167,-0.009177,-0.010655,-0.024531,0.008,0.0009,0.0105,0.0022,1.0,97.1,123.3,1,0.0,0.0,-1.0,0.9255,0.9361,0.9382,0.927,0.9368,0.9402,0.9193,0.9227,0.9342,0.9246,0.9256,0.936,0.9324,0.9384,0.9319,0.9365,0.9367,0.9313,0.9433,0.9329,0.945,0.9391,0.9444,0.9314,0.9405,0,0.945,2000-07-27


In [37]:
for count in range(4,16):
    daily['day-' + str(count) +'_mid'] = daily['mid'].shift(count)
    daily['day-' + str(count) +'_height'] = daily['height'].shift(count)

In [46]:
daily.head(20)

Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,marubozu,marubozu+1,marubozu-1,marubozu-2,day-1_open,day-2_open,day-3_open,day-1_high,day-2_high,day-3_high,day-1_low,day-2_low,day-3_low,day-1_close,day-2_close,day-3_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,exit_price,select,target,date+5,day-4_mid,day-4_height,day-5_mid,day-5_height,day-6_mid,day-6_height,day-7_mid,day-7_height,day-8_mid,day-8_height,day-9_mid,day-9_height,day-10_mid,day-10_height,day-11_mid,day-11_height,day-12_mid,day-12_height,day-13_mid,day-13_height,day-14_mid,day-14_height,day-15_mid,day-15_height
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1
2000-08-04,0.9047,0.9103,0.9015,0.9088,0.90675,0.91649,0.933074,0.001076,0.004647,-0.0167,-0.022951,-0.029643,0.0041,0.0117,0.0001,0.0107,1.0,95.2,123.0,0,0.0,-1.0,0.0,0.9167,0.9169,0.9275,0.9174,0.9192,0.9293,0.8997,0.9117,0.9135,0.905,0.917,0.9168,0.909,0.9114,0.9055,0.9083,0.9084,0.8982,0.9028,0.8965,0.9098,0.902,0.9145,0.9014,0.9129,0,0.9145,2000-08-11,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036,0.93445,0.0041,0.9285,0.008,0.92505,0.0009,0.93085,0.0105,0.9371,0.0022,0.9368,0.003
2000-08-07,0.909,0.9114,0.9055,0.9083,0.90865,0.91307,0.931095,0.004558,0.004815,-0.009052,-0.018472,-0.027974,0.0007,0.0041,0.0117,0.0001,-1.0,94.9,122.7,0,-1.0,0.0,-1.0,0.9047,0.9167,0.9169,0.9103,0.9174,0.9192,0.9015,0.8997,0.9117,0.9088,0.905,0.917,0.9084,0.9084,0.8982,0.901,0.9028,0.8965,0.9098,0.902,0.9145,0.9014,0.9075,0.9007,0.9076,0,0.8965,2000-08-14,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036,0.93445,0.0041,0.9285,0.008,0.92505,0.0009,0.93085,0.0105,0.9371,0.0022
2000-08-08,0.9084,0.9084,0.8982,0.901,0.9047,0.90958,0.928826,0.003765,0.00446,-0.006752,-0.018923,-0.034523,0.0074,0.0007,0.0041,0.0117,-1.0,94.9,123.8,-1,0.0,0.0,0.0,0.909,0.9047,0.9167,0.9114,0.9103,0.9174,0.9055,0.9015,0.8997,0.9083,0.9088,0.905,0.9009,0.9028,0.8965,0.902,0.9098,0.902,0.9145,0.9014,0.9075,0.9007,0.9166,0.9066,0.8936,0,0.8965,2000-08-15,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036,0.93445,0.0041,0.9285,0.008,0.92505,0.0009,0.93085,0.0105
2000-08-09,0.9009,0.9028,0.8965,0.902,0.90145,0.90648,0.926398,0.003522,0.003123,-0.005845,-0.016904,-0.043351,0.0011,0.0074,0.0007,0.0041,1.0,94.5,123.6,0,1.0,-1.0,0.0,0.9084,0.909,0.9047,0.9084,0.9114,0.9103,0.8982,0.9055,0.9015,0.901,0.9083,0.9088,0.902,0.9098,0.902,0.9087,0.9145,0.9014,0.9075,0.9007,0.9166,0.9066,0.9177,0.9066,0.9031,0,0.9177,2000-08-16,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036,0.93445,0.0041,0.9285,0.008,0.92505,0.0009
2000-08-10,0.902,0.9098,0.902,0.9087,0.90535,0.90538,0.924462,0.004805,0.004155,-0.003632,-0.006038,-0.034448,0.0067,0.0011,0.0074,0.0007,1.0,94.6,122.2,1,-1.0,0.0,-1.0,0.9009,0.9084,0.909,0.9028,0.9084,0.9114,0.8965,0.8982,0.9055,0.902,0.901,0.9083,0.9089,0.9145,0.9014,0.9029,0.9075,0.9007,0.9166,0.9066,0.9177,0.9066,0.918,0.9119,0.9154,0,0.918,2000-08-17,0.90675,0.0041,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036,0.93445,0.0041,0.9285,0.008
2000-08-11,0.9089,0.9145,0.9014,0.9029,0.9059,0.90521,0.92291,0.003962,0.003573,0.001326,-0.000937,-0.023867,0.006,0.0067,0.0011,0.0074,-1.0,94.8,123.0,-1,0.0,1.0,0.0,0.902,0.9009,0.9084,0.9098,0.9028,0.9084,0.902,0.8965,0.8982,0.9087,0.902,0.901,0.9031,0.9075,0.9007,0.9067,0.9166,0.9066,0.9177,0.9066,0.918,0.9119,0.9184,0.9059,0.8969,0,0.9007,2000-08-18,0.90865,0.0007,0.90675,0.0041,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036,0.93445,0.0041
2000-08-14,0.9031,0.9075,0.9007,0.9067,0.9049,0.90446,0.92139,0.002776,0.003597,0.003827,-0.004127,-0.022522,0.0036,0.006,0.0067,0.0011,1.0,95.7,124.3,0,1.0,-1.0,1.0,0.9089,0.902,0.9009,0.9145,0.9098,0.9028,0.9014,0.902,0.8965,0.9029,0.9087,0.902,0.9067,0.9166,0.9066,0.9161,0.9177,0.9066,0.918,0.9119,0.9184,0.9059,0.9079,0.9013,0.9103,0,0.9184,2000-08-21,0.9047,0.0074,0.90865,0.0007,0.90675,0.0041,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083,0.9348,0.0036
2000-08-15,0.9067,0.9166,0.9066,0.9161,0.9114,0.9058,0.920167,0.004375,0.004606,0.006682,0.007406,-0.011658,0.0094,0.0036,0.006,0.0067,1.0,95.3,122.5,1,0.0,0.0,-1.0,0.9031,0.9089,0.902,0.9075,0.9145,0.9098,0.9007,0.9014,0.902,0.9067,0.9029,0.9087,0.916,0.9177,0.9066,0.9152,0.918,0.9119,0.9184,0.9059,0.9079,0.9013,0.9035,0.8933,0.9255,0,0.9184,2000-08-22,0.90145,0.0011,0.9047,0.0074,0.90865,0.0007,0.90675,0.0041,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024,0.93705,0.0083
2000-08-16,0.916,0.9177,0.9066,0.9152,0.9156,0.90863,0.91944,0.004241,0.004639,0.010708,0.015697,-0.001472,0.0008,0.0094,0.0036,0.006,-1.0,96.0,123.7,0,0.0,1.0,0.0,0.9067,0.9031,0.9089,0.9166,0.9075,0.9145,0.9066,0.9007,0.9014,0.9161,0.9067,0.9029,0.9151,0.918,0.9119,0.9164,0.9184,0.9059,0.9079,0.9013,0.9035,0.8933,0.9043,0.8912,0.9144,0,0.8912,2000-08-23,0.90535,0.0067,0.90145,0.0011,0.9047,0.0074,0.90865,0.0007,0.90675,0.0041,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115,0.9423,0.0024
2000-08-17,0.9151,0.918,0.9119,0.9164,0.91575,0.91071,0.918998,0.003551,0.004037,0.01199,0.011487,0.00538,0.0013,0.0008,0.0094,0.0036,1.0,96.3,123.7,0,-1.0,0.0,1.0,0.916,0.9067,0.9031,0.9177,0.9166,0.9075,0.9066,0.9066,0.9007,0.9152,0.9161,0.9067,0.9164,0.9184,0.9059,0.9076,0.9079,0.9013,0.9035,0.8933,0.9043,0.8912,0.9055,0.8986,0.9177,0,0.9184,2000-08-24,0.9059,0.006,0.90535,0.0067,0.90145,0.0011,0.9047,0.0074,0.90865,0.0007,0.90675,0.0041,0.91085,0.0117,0.91695,0.0001,0.92215,0.0107,0.92575,0.0033,0.92805,0.0079,0.93765,0.0115


In [40]:
daily.shape

(4612, 75)

In [41]:
daily.dropna(inplace=True)

In [42]:
daily.shape

(4597, 75)

In [43]:
strat_res = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/model_results/arima_results.csv', parse_dates=True)

In [109]:
strat_res.tail(20)

Unnamed: 0,name,strategy,date,time_frame,RMSE,MSE,classification
42,arima-0-1-0,marubozu,2012-07-25,daily,0.013808,0.000191,tp
43,arima-0-1-0,marubozu,2012-12-05,daily,0.0152,0.000231,fn
44,arima-0-1-0,marubozu,2013-05-23,daily,0.003633,1.3e-05,tp
45,arima-0-1-0,marubozu,2013-08-02,daily,0.003621,1.3e-05,tp
46,arima-0-1-0,marubozu,2014-09-19,daily,0.012922,0.000167,fn
47,arima-0-1-0,marubozu,2014-12-11,daily,0.008296,6.9e-05,fn
48,arima-0-1-0,marubozu,2015-02-05,daily,0.015147,0.000229,tn
49,arima-0-1-0,marubozu,2015-06-29,daily,0.017357,0.000301,tn
50,arima-0-1-0,marubozu,2016-01-06,daily,0.008007,6.4e-05,fn
51,arima-0-1-0,marubozu,2016-01-20,daily,0.009116,8.3e-05,fn


In [95]:
len(strat_res)

62

In [7]:
# Create features dataset

In [None]:
    # insert the signal to dataset
    train_test.insert(0, 'signal', model_info['signal'])

    # create start and end points for the test/train splits
    model_info['start'] = len(train_test)-5
    model_info['end'] = len(train_test)-1

In [86]:
daily_pattern = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/targets/daily_pattern2.csv', 
                           parse_dates=True)
daily_pattern.head()

Unnamed: 0,pattern_end
0,2000-10-11
1,2000-10-20
2,2001-04-05
3,2001-04-09
4,2001-08-20


In [132]:
features = daily[daily.index.isin(daily_pattern['pattern_end'])]
features.shape
#df[df.index.isin(my_list)]

(64, 75)

In [133]:
features = features[features.index.isin(strat_res['date'])]
features.shape

(62, 75)

In [134]:
results_series = list(strat_res['classification'])
features['ml_signal'] = results_series
features


Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,marubozu,marubozu+1,marubozu-1,marubozu-2,day-1_open,day-2_open,day-3_open,day-1_high,day-2_high,day-3_high,day-1_low,day-2_low,day-3_low,day-1_close,day-2_close,day-3_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,exit_price,select,target,date+5,day-4_mid,day-4_height,day-5_mid,day-5_height,day-6_mid,day-6_height,day-7_mid,day-7_height,day-8_mid,day-8_height,day-9_mid,day-9_height,day-10_mid,day-10_height,day-11_mid,day-11_height,day-12_mid,day-12_height,day-13_mid,day-13_height,day-14_mid,day-14_height,day-15_mid,day-15_height,ml_signal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1
2000-10-11,0.87330,0.87640,0.86440,0.86770,0.870500,0.870460,0.869214,0.001513,0.001716,0.000632,-0.006392,-0.014491,0.00560,0.00530,0.00190,0.00030,-1.0,94.8,127.4,-1,0.0,1.0,0.0,0.86790,0.86990,0.87010,0.87470,0.87100,0.87430,0.86700,0.86650,0.86740,0.87320,0.86800,0.86980,0.86760,0.86820,0.85970,0.86280,0.86600,0.85300,0.85570,0.84620,0.85650,0.84800,0.85790,0.83270,0.86210,1,0.83270,2000-10-18,0.872350,0.00430,0.876100,0.00300,0.877600,0.00020,0.880200,0.00520,0.882050,0.00150,0.882500,0.00260,0.883300,0.00120,0.879050,0.00710,0.878500,0.00580,0.870600,0.02180,0.853800,0.01160,0.848950,0.00210,tp
2000-10-20,0.84710,0.84750,0.83510,0.84010,0.843600,0.847740,0.867864,0.002844,0.003502,-0.009801,-0.017985,-0.030289,0.00700,0.00710,0.01460,0.00530,-1.0,94.0,131.8,-1,0.0,1.0,-1.0,0.84010,0.85460,0.84930,0.84870,0.85790,0.85650,0.83830,0.83270,0.84800,0.84720,0.84000,0.85460,0.84020,0.84260,0.83450,0.83670,0.84000,0.83490,0.83890,0.82490,0.83250,0.82290,0.84460,0.82900,0.83310,1,0.82290,2000-10-27,0.852200,0.00620,0.859050,0.00730,0.865200,0.00480,0.870500,0.00560,0.870550,0.00530,0.868950,0.00190,0.869950,0.00030,0.872350,0.00430,0.876100,0.00300,0.877600,0.00020,0.880200,0.00520,0.882050,0.00150,tp
2001-04-05,0.90660,0.90910,0.89430,0.89670,0.901650,0.890360,0.901571,0.007044,0.006930,0.024369,0.020832,0.010988,0.00990,0.01000,0.01380,0.00540,-1.0,89.7,117.5,-1,1.0,1.0,1.0,0.89660,0.88290,0.87750,0.90800,0.89870,0.88370,0.89370,0.88210,0.87450,0.90660,0.89670,0.88290,0.89660,0.90510,0.89270,0.90340,0.90510,0.89470,0.89950,0.88680,0.89180,0.88140,0.89460,0.88400,0.88680,1,0.88140,2001-04-12,0.878550,0.00190,0.883250,0.00770,0.890500,0.00660,0.894700,0.00200,0.893800,0.00360,0.889600,0.00500,0.891850,0.00930,0.903000,0.01320,0.904950,0.00970,0.898550,0.00330,0.897300,0.00100,0.903900,0.01240,tp
2001-04-09,0.90350,0.90510,0.89470,0.89650,0.900000,0.898610,0.898467,0.001073,0.006785,-0.001775,0.022495,0.006937,0.00700,0.00680,0.00990,0.01000,-1.0,90.0,117.2,-1,-1.0,1.0,-1.0,0.89660,0.90660,0.89660,0.90510,0.90910,0.90800,0.89270,0.89430,0.89370,0.90340,0.89670,0.90660,0.89640,0.89950,0.88680,0.88980,0.89180,0.88140,0.89460,0.88400,0.89330,0.88910,0.88950,0.88340,0.88950,1,0.88140,2001-04-16,0.889800,0.01380,0.880200,0.00540,0.878550,0.00190,0.883250,0.00770,0.890500,0.00660,0.894700,0.00200,0.893800,0.00360,0.889600,0.00500,0.891850,0.00930,0.903000,0.01320,0.904950,0.00970,0.898550,0.00330,tp
2001-08-20,0.91780,0.92050,0.91130,0.91260,0.915200,0.911100,0.887862,0.002255,0.003879,0.005714,0.021771,0.039350,0.00520,0.00530,0.00310,0.01100,-1.0,96.1,123.7,-1,0.0,1.0,0.0,0.91240,0.91560,0.90450,0.91840,0.92030,0.91710,0.91050,0.90930,0.90340,0.91770,0.91250,0.91550,0.91250,0.91850,0.90820,0.91680,0.92420,0.91230,0.91700,0.91010,0.91690,0.90780,0.91250,0.90870,0.90740,1,0.90780,2001-08-27,0.901200,0.00680,0.895700,0.00400,0.892950,0.00130,0.886900,0.01060,0.879450,0.00410,0.878950,0.00330,0.880550,0.00010,0.881350,0.00130,0.882150,0.00010,0.881200,0.00180,0.878100,0.00460,0.875500,0.00080,tp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-09-14,1.16928,1.17215,1.16199,1.16357,1.166425,1.162073,1.160078,0.002361,0.002441,0.006246,0.006945,0.002854,0.00571,0.00557,0.00515,0.00123,-1.0,417.6,420.4,-1,1.0,1.0,1.0,1.16372,1.15858,1.15980,1.17008,1.16493,1.16437,1.16090,1.15698,1.15654,1.16929,1.16373,1.15857,1.16361,1.17027,1.16177,1.16953,1.17243,1.16520,1.17147,1.16500,1.17855,1.16772,1.18025,1.17325,1.15786,1,1.16177,2018-09-21,1.157095,0.00539,1.158380,0.00798,1.162905,0.00105,1.161525,0.00381,1.159855,0.00043,1.159980,0.00020,1.163105,0.00649,1.168185,0.00367,1.169750,0.00056,1.168440,0.00208,1.165010,0.00478,1.159070,0.00712,tn
2018-10-22,1.15144,1.15499,1.14530,1.14543,1.148435,1.150957,1.157464,0.002761,0.002606,-0.003989,-0.006686,-0.001478,0.00601,0.00569,0.00421,0.00611,-1.0,424.7,434.8,-1,0.0,1.0,0.0,1.14576,1.14997,1.15609,1.15343,1.15273,1.15777,1.14330,1.14494,1.14941,1.15145,1.14576,1.14998,1.14543,1.14932,1.14391,1.14708,1.14762,1.13789,1.14322,1.13560,1.14206,1.13355,1.14163,1.13605,1.13942,1,1.13355,2018-10-29,1.156845,0.00153,1.156165,0.00291,1.157485,0.00555,1.158540,0.00344,1.153920,0.00578,1.150030,0.00198,1.150135,0.00221,1.150935,0.00061,1.148930,0.00340,1.152605,0.01075,1.157500,0.00096,1.158325,0.00259,fn
2018-12-10,1.14282,1.14398,1.13500,1.13690,1.139860,1.137008,1.135899,0.001891,0.002309,0.005385,0.003054,0.005052,0.00592,0.00544,0.00265,0.00191,-1.0,432.7,446.7,-1,0.0,1.0,0.0,1.13737,1.13472,1.13280,1.14425,1.14121,1.13609,1.13602,1.13207,1.13105,1.14281,1.13737,1.13471,1.13690,1.13999,1.13062,1.13261,1.13871,1.13149,1.13932,1.13310,1.13616,1.12696,1.13600,1.13061,1.13098,1,1.12696,2018-12-17,1.135290,0.00494,1.136390,0.00276,1.137120,0.00430,1.138930,0.00068,1.134055,0.00907,1.131615,0.00419,1.134130,0.00086,1.137955,0.00679,1.140290,0.00212,1.138485,0.00147,1.141160,0.00682,1.142175,0.00483,fn
2019-01-31,1.15026,1.15141,1.14356,1.14420,1.147230,1.143486,1.142618,0.001569,0.002091,0.004122,0.010490,0.007398,0.00606,0.00626,0.00085,0.00126,-1.0,459.8,470.5,-1,0.0,1.0,0.0,1.14401,1.14315,1.14189,1.15080,1.14501,1.14436,1.14062,1.14111,1.13900,1.15027,1.14400,1.14315,1.14420,1.14884,1.14343,1.14424,1.14542,1.14244,1.14356,1.13958,1.14023,1.13567,1.13665,1.13244,1.13814,1,1.13244,2019-02-07,1.136965,0.00985,1.135320,0.00658,1.137620,0.00198,1.136240,0.00078,1.136850,0.00202,1.138610,0.00150,1.138805,0.00111,1.139630,0.00276,1.144495,0.00697,1.147795,0.00037,1.150235,0.00533,1.154365,0.00295,fn


In [141]:
features.groupby('direction').count()['open']

direction
-1.0    36
 1.0    26
Name: open, dtype: int64

In [136]:
features.groupby('ml_signal').count()

Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,marubozu,marubozu+1,marubozu-1,marubozu-2,day-1_open,day-2_open,day-3_open,day-1_high,day-2_high,day-3_high,day-1_low,day-2_low,day-3_low,day-1_close,day-2_close,day-3_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,exit_price,select,target,date+5,day-4_mid,day-4_height,day-5_mid,day-5_height,day-6_mid,day-6_height,day-7_mid,day-7_height,day-8_mid,day-8_height,day-9_mid,day-9_height,day-10_mid,day-10_height,day-11_mid,day-11_height,day-12_mid,day-12_height,day-13_mid,day-13_height,day-14_mid,day-14_height,day-15_mid,day-15_height
ml_signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1
fn,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27
fp,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
tn,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
tp,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13


In [137]:
def convert_signal(row):
    if row['ml_signal'] == 'tp':
        return row['direction']
    else:
        return 0

In [138]:
features['ml_signal'] = features.apply(convert_signal, axis=1)

In [140]:
features.groupby('ml_signal').count()['open']

ml_signal
-1.0     7
 0.0    49
 1.0     6
Name: open, dtype: int64

---

---

# 3.0 Model

## 3.1 Train/Test Split

In [None]:
def create_train_test_split(date, time_series, model_info):

    # Get index of pattern and add 6 (so 5) extra rows for Test/train set
    test_end_loc = time_series.index.get_loc(date) + 6
    
    # Create train/test set using index loc of pattern 
    train_test = time_series.iloc[:test_end_loc]
    
    # Set target values
    target_value = time_series.loc[time_series.index == date,'exit_price'].item()
    
    # add target price to dataset
    train_test.insert(0, 'target_price', target_value)
    
    # Add Signal so it can be determined whether we expect the price to go up or down.
    model_info['signal'] = time_series.loc[date,'marubozu']
    
    # insert the signal to dataset
    train_test.insert(0, 'signal', model_info['signal'])
    
    # create start and end points for the test/train splits
    model_info['start'] = len(train_test)-5
    model_info['end'] = len(train_test)-1
    
    # create the train and data sets
    model_info['train'] = train_test.iloc[:model_info['start']]
    model_info['test'] = train_test.iloc[model_info['start']:]
    
    return model_info

## 3.2 Fit Model

In [3]:
# Define a results dictionary to use to collect the results
results = {'algo':'','name':'','date':'', 'time_frame':'','success':0,'RMSE':0, 'MSE':0, 'classification':'' }

In [None]:
def train_arima(model_info, p=0, d=1, q=0):
    
    exog = np.column_stack([model_info['train']['mnth_mv_avg'], 
                            model_info['train']['wk_mv_avg'],
                            model_info['train']['volatility_3_day'],
                            model_info['train']['gold_euro'],
                            model_info['train']['gold_usd']])
    
    if model_info['signal'] == -1:
        model = ARIMA(model_info['train']['low'], exog=exog, order=(p,d,q))
    else:
        model = ARIMA(model_info['train']['high'], exog=exog, order=(p,d,q))

    results = model.fit()
    predictions = results.predict(start=model_info['start'], 
                                  end=model_info['end'], exog=exog,
                                  dynamic=True, 
                                  typ='levels').rename('ARIMA-0-1-0 Predictions')
    
    return results, predictions

## 3.3 Calculate Results

In [None]:
def meet_threshold(row):
    if row['signal'] == -1 and row['low'] <= row['target_price']:
        return -1
    elif row['signal'] == 1 and row['high'] >= row['target_price']:
        return 1    
    else:
        return 0 

In [None]:
def ml_decision(row):
    if row['direction'] == -1 and row['preds'] <= row['target_price']:
        return -1
    elif row['direction'] == 1 and row['preds'] >= row['target_price']:
        return 1    
    else:
        return 0

In [None]:
def create_results_outcomes_dataframe(test, predictions):    
    outcomes = pd.DataFrame()
    outcomes['low'] = test['low']
    outcomes['high'] = test['high']
    outcomes['preds'] = predictions.values
    outcomes['target_price'] = test['target_price']
    outcomes['direction'] = test['signal']
    outcomes['correct_call'] = test.apply(meet_threshold, axis=1)
    return outcomes

In [None]:
def print_chart(outcomes):
    if model_info['signal'] == -1:
        outcomes['low'].plot(legend=False, figsize=(12,8))
    else:
        outcomes['high'].plot(legend=False, figsize=(12,8))

    outcomes['preds'].plot(legend=False);
    outcomes['target_price'].plot(legend=False);

In [None]:
def get_results(model_info):
        
    if model_info['signal'] == -1:
        mse = mean_squared_error(model_info['test']['low'], predictions)
        rmse_res = rmse(model_info['test']['low'], predictions)
    else:
        mse = mean_squared_error(model_info['test']['high'], predictions)
        rmse_res = rmse(model_info['test']['high'], predictions)       
    
    return rmse_res, mse

In [None]:
def classify(outcomes):
    
    if max(outcomes['direction']) == 1:
        
        if max(outcomes['correct_call']) == 0 and max(outcomes['ml_correct_call']) == 0:
            return 'tn'
        elif max(outcomes['correct_call']) == 1 and max(outcomes['ml_correct_call']) == 1:
            return 'tp'
        elif max(outcomes['correct_call']) == 0 and max(outcomes['ml_correct_call']) == 1:
            return 'fp'
        elif max(outcomes['correct_call']) == 1 and max(outcomes['ml_correct_call']) == 0:
            return 'fn'
        
    elif max(outcomes['direction']) == -1:
        
        if min(outcomes['correct_call']) == 0 and min(outcomes['ml_correct_call']) == 0:
            return 'tn'
        elif min(outcomes['correct_call']) == -1 and min(outcomes['ml_correct_call']) == -1:
            return 'tp'
        elif min(outcomes['correct_call']) == 0 and min(outcomes['ml_correct_call']) == -1:
            return 'fp'
        elif min(outcomes['correct_call']) == -1 and min(outcomes['ml_correct_call']) == 0:
            return 'fn'
        
    else:
        return 'ERROR'

## 3.4 Run Model