# 4.0 Modelling Baseline

---

# Contents

- [1.0 Arima Model](#1.0-ARIMA-Model)
- [2.0 Daily Data](#2.0-Daily-Data)
    - [1.1 Load Data](#2.1-Load-Data)
    - [1.2 Train Test Split](#2.2-Train-Test-Split)

In [1]:
# !pip install pmdarima

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import datetime
import calendar

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA, ARMA, ARMAResults, ARIMAResults
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import r2_score, mean_squared_error
from pmdarima import auto_arima
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")


from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse


In [3]:
pd.set_option('display.max_columns', None)

---

# 1.0 ARIMA Model

The Arima model has 3 components:

+ Differencing Step - I - Integrated - Check for stationarity
+ Autoregressive Piece - AR - long term trends
+ Moving Average Piece - MA - Modelling sudden fluctuations

Each part has input into the model P,D,Q. I will use the previous Dickey Fuller 

+ D is the order of differencing we found using the Augmented Dickey-Fuller test.
+ P is the number of autoregressive terms in our model. PACF is used to estimate this.
+ Q is to do with looking at the moving average.
    + If PACF has a sharp cut off and lag-1 for the ACF is negative choose q to be the lag in the ACF before cut off.
    + If PACF does not have a sharp cut off or lag -1 ACF is not negative choose q = 0

Therefore based on the charts before I will use:
    
    + p = 1
    + d = 1
    + q = 0
    
However I will use auto_arima to help decide.

---

In [4]:
results = {'algo':'','name':'','date':'', 'time_frame':'','success':0,'RMSE':0, 'MSE':0, 'classification':'' }

##### 1.1 Load Data marubozu

In [5]:
daily_maru = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/eur-usddailyMarubozu.csv', 
                    index_col='date', parse_dates=True)

In [6]:
daily_maru.index

DatetimeIndex(['2000-07-14', '2000-07-17', '2000-07-18', '2000-07-19',
               '2000-07-20', '2000-07-21', '2000-07-24', '2000-07-25',
               '2000-07-26', '2000-07-27',
               ...
               '2019-12-11', '2019-12-12', '2019-12-13', '2019-12-16',
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24'],
              dtype='datetime64[ns]', name='date', length=4612, freq=None)

In [7]:
daily_maru.columns

Index(['open', 'high', 'low', 'close', 'mid', 'wk_mv_avg', 'mnth_mv_avg',
       'volatility_3_day', 'volatility_10_day', 'pct_chge_3_prds',
       'pct_chge_5_prds', 'pct_chge_10_prds', 'height', 'height-1', 'height-2',
       'height-3', 'direction', 'gold_usd', 'gold_euro', 'marubozu',
       'marubozu+1', 'marubozu-1', 'marubozu-2', 'day-1_open', 'day-2_open',
       'day-3_open', 'day-1_high', 'day-2_high', 'day-3_high', 'day-1_low',
       'day-2_low', 'day-3_low', 'day-1_close', 'day-2_close', 'day-3_close',
       'day+1_open', 'day+1_high', 'day+1_low', 'day+1_close', 'day+2_high',
       'day+2_low', 'day+3_high', 'day+3_low', 'day+4_high', 'day+4_low',
       'day+5_high', 'day+5_low', 'exit_price', 'select', 'target', 'date+5'],
      dtype='object')

In [8]:
#daily = daily.resample('B').agg({'open':'first','high':'max','low':'min', 'close':'last'})


In [9]:
daily_maru.index

DatetimeIndex(['2000-07-14', '2000-07-17', '2000-07-18', '2000-07-19',
               '2000-07-20', '2000-07-21', '2000-07-24', '2000-07-25',
               '2000-07-26', '2000-07-27',
               ...
               '2019-12-11', '2019-12-12', '2019-12-13', '2019-12-16',
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24'],
              dtype='datetime64[ns]', name='date', length=4612, freq=None)

In [10]:
daily_maru['date+5'] = pd.to_datetime(daily_maru['date+5'])

In [11]:
daily_maru.loc[daily_maru.index[1],'date+5']

Timestamp('2000-07-24 00:00:00')

In [12]:
type(daily_maru['date+5'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [13]:
### Get correct hyper parameters

In [14]:
## Arima
auto_arima(daily_maru['close'].dropna(), seasonal=False).summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,4612.0
Model:,"SARIMAX(0, 1, 0)",Log Likelihood,15826.945
Date:,"Wed, 05 Aug 2020",AIC,-31651.89
Time:,23:07:36,BIC,-31645.453
Sample:,0,HQIC,-31649.624
,- 4612,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,6.11e-05,5.15e-07,118.588,0.000,6.01e-05,6.21e-05

0,1,2,3
Ljung-Box (Q):,61.33,Jarque-Bera (JB):,20105.01
Prob(Q):,0.02,Prob(JB):,0.0
Heteroskedasticity (H):,0.66,Skew:,-0.38
Prob(H) (two-sided):,0.0,Kurtosis:,13.2


---

## 1.1 Get patterns

In [15]:
daily_pattern = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/patterns/dailyMarubozu.csv', 
                           parse_dates=True)

In [16]:
daily_pattern['pattern_end'] = pd.to_datetime(daily_pattern['pattern_end'])

In [17]:
daily_pattern.loc[1]

pattern_end   2000-10-20
Name: 1, dtype: datetime64[ns]

In [18]:
len(daily_pattern)

64

---

In [19]:
def create_train_test_split(date, time_series, model_info):
    test_end_date = time_series.loc[date,'date+5']
    
    train_test = time_series.loc[time_series.index <= test_end_date]
  
    target_value = time_series.loc[time_series.index == date,'exit_price'].item()
    
    train_test.insert(0, 'target_price', target_value)
    
    model_info['signal'] = time_series.loc[date,'marubozu']
    
    train_test.insert(0, 'signal', model_info['signal'])
    
    model_info['start'] = len(train_test)-5
    model_info['end'] = len(train_test)-1
    
    model_info['train'] = train_test.iloc[:model_info['start']]
    model_info['test'] = train_test.iloc[model_info['start']:]

    return model_info

In [20]:
def meet_threshold(row):
    if row['signal'] == -1 and row['low'] <= row['target_price']:
        return -1
    elif row['signal'] == 1 and row['high'] >= row['target_price']:
#         print(f"row high: {row['high']} >= row dbl height: {row['target_price']}" )
        return 1    
    else:
        return 0

In [21]:
def ml_decision(row):
    if row['direction'] == -1 and row['preds'] <= row['target_price']:
        return -1
    elif row['direction'] == 1 and row['preds'] >= row['target_price']:
        print(f"preds: {row['preds']} >= row target: {row['target_price']}" )
        return 1    
    else:
        return 0

In [22]:
def create_results_outcomes_dataframe(test): #, predictions):    
    outcomes = pd.DataFrame()
    outcomes['low'] = test['low']
    outcomes['high'] = test['high']
#     outcomes['preds'] = predictions.values
    outcomes['target_price'] = test['target_price']
    outcomes['direction'] = test['signal']
    outcomes['correct_call'] = test.apply(meet_threshold, axis=1)

    return outcomes

In [23]:
def classify(outcomes):
    
    # As its the benchmark then it is assumed that that a buy/sell decision is made
    
    if max(outcomes['direction']) == 1:
        
        if max(outcomes['correct_call']) == 0:
            return 'fp'
        elif max(outcomes['correct_call']) == 1:
            return 'tp'
        
    elif max(outcomes['direction']) == -1:
        
        if min(outcomes['correct_call']) == 0:
            return 'fp'
        elif min(outcomes['correct_call']) == -1:
            return 'tp'
        
    else:
        return 'ERROR'
    

In [24]:
model_info = {"train":None,"test":None,"start":None,"end":None,"signal":None}
benchmark_results = []

for match in daily_pattern['pattern_end']:
    
    results_dict = {'name':None,'pattern':None,'date':None,
                   'time_frame':None,'RMSE':None,
                   'MSE':None, 'classification':None}
    
    results_dict['name'] = 'Bechmark: ' + str(match)
    results_dict['strategy'] = 'Maribozu'
    results_dict['time_frame'] = 'daily'

    model_info = create_train_test_split(match, daily_maru, model_info)

    if len(model_info['train']) < 10:
        continue

    outcomes = create_results_outcomes_dataframe(model_info['test'])

    results_dict['classification'] = classify(outcomes)

    benchmark_results.append(results_dict)



In [25]:
# arima_results

In [26]:
def create_cm(results):
    
    res_cm = [[0,0],
              [0,0]]
    
    for result in results:
        res = result['classification']
        
        if res == 'tp':
            res_cm[0][0] += 1
        elif res == 'fp':
            res_cm[0][1] += 1
        elif res == 'fn':
            res_cm[1][0] += 1
        elif res == 'tn':
            res_cm[1][1] += 1
    
    return res_cm

In [27]:
cm = create_cm(benchmark_results)

In [28]:
cm_df = pd.DataFrame(cm, index=['pred_success', 'pred_non_success'], columns=['actual success', 'actual non_success'])
cm_df

Unnamed: 0,actual success,actual non_success
pred_success,40,24
pred_non_success,0,0


In [29]:
def print_metrics(cm):
    # Accuracy - how many did the model get right
    # Total number of correct predictions / total number of predictions
    acc= (cm[0][0]+cm[1][1])/(np.sum(cm))
    
    # Precision proportion of positive identifications that were actually correct
    # True positives/ true positives + false positives)
    prec = cm[0][0]/(cm[0][0]+cm[0][1])
    
    # Recall - proportion of actual positives that were correctly defined
    # True positives/ true positives + false negatives
    rec = cm[0][0]/(cm[0][0]+cm[1][0])

    print(f"Accuracy:\t{round(acc,2)}\nPrecision:\t{round(prec,2)}\nRecall:\t\t{round(rec,2)}")

In [30]:
# Display the results
print_metrics(cm)

Accuracy:	0.62
Precision:	0.62
Recall:		1.0


---

## Baseline for Fractals

##### 5 Load Data

In [31]:
daily_fract = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/eur-usddailyfractals.csv', 
                    index_col='date', parse_dates=True)

In [32]:
daily_fract.index

DatetimeIndex(['2000-07-17', '2000-07-18', '2000-07-19', '2000-07-20',
               '2000-07-21', '2000-07-24', '2000-07-25', '2000-07-26',
               '2000-07-27', '2000-07-28',
               ...
               '2019-12-11', '2019-12-12', '2019-12-13', '2019-12-16',
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24'],
              dtype='datetime64[ns]', name='date', length=4632, freq=None)

In [33]:
daily_fract.columns

Index(['open', 'high', 'low', 'close', 'mid', 'wk_mv_avg', 'mnth_mv_avg',
       'volatility_3_day', 'volatility_10_day', 'pct_chge_3_prds',
       'pct_chge_5_prds', 'pct_chge_10_prds', 'height', 'height-1', 'height-2',
       'height-3', 'direction', 'gold_usd', 'gold_euro', 'day-1_high',
       'day-2_high', 'day-3_high', 'day-4_high', 'day-1_low', 'day-2_low',
       'day-3_low', 'day-4_low', 'day-1_open', 'day-2_open', 'day-3_open',
       'day-4_open', 'day-1_close', 'day-2_close', 'day-3_close',
       'day-4_close', 'day+1_open', 'day+1_high', 'day+1_low', 'day+1_close',
       'day+2_high', 'day+2_low', 'day+3_high', 'day+3_low', 'day+4_high',
       'day+4_low', 'day+5_high', 'day+5_low', '5_day_avg', 'fractal_end',
       'day+1_frac', 'day+2_frac', 'day+3_frac', 'day+4_frac', 'select',
       'exit_price', 'target', 'date+5', 'fractal'],
      dtype='object')

In [34]:
daily_fract.head()

Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,day-1_high,day-2_high,day-3_high,day-4_high,day-1_low,day-2_low,day-3_low,day-4_low,day-1_open,day-2_open,day-3_open,day-4_open,day-1_close,day-2_close,day-3_close,day-4_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,5_day_avg,fractal_end,day+1_frac,day+2_frac,day+3_frac,day+4_frac,select,exit_price,target,date+5,fractal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1
2000-07-17,0.9382,0.9402,0.9342,0.936,0.9371,0.94217,0.9471,0.004282,0.00344,-0.009408,-0.016013,-0.014875,0.0022,0.003,0.0062,0.0086,0,98.1,123.3,0.9389,0.9425,0.9517,0.9569,0.9318,0.933,0.9396,0.9496,0.9353,0.9416,0.9503,0.9545,0.9383,0.9354,0.9417,0.9504,0.9361,0.9368,0.9227,0.9256,0.927,0.9193,0.9342,0.9204,0.9384,0.9319,0.9367,0.9313,0.005,0,0.0,0.0,0.0,-1.0,0,0.936,0.0,2000-07-24,0
2000-07-18,0.9361,0.9368,0.9227,0.9256,0.93085,0.93785,0.945633,0.003582,0.003797,-0.008151,-0.022678,-0.020519,0.0105,0.0022,0.003,0.0062,0,98.3,123.8,0.9402,0.9389,0.9425,0.9517,0.9342,0.9318,0.933,0.9396,0.9382,0.9353,0.9416,0.9503,0.936,0.9383,0.9354,0.9417,0.9255,0.927,0.9193,0.9246,0.9342,0.9204,0.9384,0.9319,0.9367,0.9313,0.9433,0.9329,0.0061,0,0.0,0.0,-1.0,0.0,0,0.9256,0.0,2000-07-25,0
2000-07-19,0.9255,0.927,0.9193,0.9246,0.92505,0.93366,0.944207,0.003915,0.003618,-0.012543,-0.022146,-0.029125,0.0009,0.0105,0.0022,0.003,0,97.0,123.3,0.9368,0.9402,0.9389,0.9425,0.9227,0.9342,0.9318,0.933,0.9361,0.9382,0.9353,0.9416,0.9256,0.936,0.9383,0.9354,0.9245,0.9342,0.9204,0.9325,0.9384,0.9319,0.9367,0.9313,0.9433,0.9329,0.945,0.9391,0.005,0,0.0,-1.0,0.0,0.0,0,0.9246,0.0,2000-07-26,0
2000-07-20,0.9245,0.9342,0.9204,0.9325,0.9285,0.93166,0.943221,0.005881,0.004167,-0.009177,-0.010655,-0.024531,0.008,0.0009,0.0105,0.0022,0,97.1,123.3,0.927,0.9368,0.9402,0.9389,0.9193,0.9227,0.9342,0.9318,0.9255,0.9361,0.9382,0.9353,0.9246,0.9256,0.936,0.9383,0.9324,0.9384,0.9319,0.9365,0.9367,0.9313,0.9433,0.9329,0.945,0.9391,0.9444,0.9314,0.005,0,-1.0,0.0,0.0,0.0,0,0.9325,0.0,2000-07-27,0
2000-07-21,0.9324,0.9384,0.9319,0.9365,0.93445,0.93119,0.942879,0.00666,0.005033,0.003867,-0.002509,-0.016575,0.0041,0.008,0.0009,0.0105,-1,97.1,122.2,0.9342,0.927,0.9368,0.9402,0.9204,0.9193,0.9227,0.9342,0.9245,0.9255,0.9361,0.9382,0.9325,0.9246,0.9256,0.936,0.9366,0.9367,0.9313,0.933,0.9433,0.9329,0.945,0.9391,0.9444,0.9314,0.9338,0.9229,0.00514,-1,0.0,0.0,0.0,0.0,1,0.93136,0.9229,2000-07-28,0


In [35]:
daily_fract.shape

(4632, 58)

In [36]:
daily_fract.index

DatetimeIndex(['2000-07-17', '2000-07-18', '2000-07-19', '2000-07-20',
               '2000-07-21', '2000-07-24', '2000-07-25', '2000-07-26',
               '2000-07-27', '2000-07-28',
               ...
               '2019-12-11', '2019-12-12', '2019-12-13', '2019-12-16',
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24'],
              dtype='datetime64[ns]', name='date', length=4632, freq=None)

In [37]:
daily_fract['date+5'] = pd.to_datetime(daily_fract['date+5'])

In [38]:
daily_fract.loc[daily_fract.index[1],'date+5']

Timestamp('2000-07-25 00:00:00')

In [39]:
type(daily_fract['date+5'][0])

pandas._libs.tslibs.timestamps.Timestamp

# Get fractal patterns

In [40]:
fractal_pattern = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/patterns/dailyfractals.csv', 
                           parse_dates=True)

In [41]:
fractal_pattern['pattern_end'] = pd.to_datetime(fractal_pattern['pattern_end'])

In [42]:
fractal_pattern.loc[1]

pattern_end   2000-07-28
Name: 1, dtype: datetime64[ns]

In [43]:
len(fractal_pattern)

613

---

In [44]:
fractal_pattern.loc[290]

pattern_end   2010-06-29
Name: 290, dtype: datetime64[ns]

In [45]:
daily_fract.loc[daily_fract.index == '2019-05-15']

Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,day-1_high,day-2_high,day-3_high,day-4_high,day-1_low,day-2_low,day-3_low,day-4_low,day-1_open,day-2_open,day-3_open,day-4_open,day-1_close,day-2_close,day-3_close,day-4_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,5_day_avg,fractal_end,day+1_frac,day+2_frac,day+3_frac,day+4_frac,select,exit_price,target,date+5,fractal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1
2019-05-15,1.12085,1.12245,1.11779,1.12085,1.12085,1.122131,1.121067,0.000943,0.001272,-0.001875,0.001,-0.000321,0.0,0.00309,0.00095,1e-05,1,451.4,473.0,1.12439,1.12635,1.12536,1.12509,1.12007,1.12191,1.12177,1.11734,1.12393,1.12297,1.12295,1.11908,1.12084,1.12392,1.12296,1.12296,1.12086,1.12238,1.11662,1.11773,1.11841,1.11531,1.11748,1.11506,1.1188,1.11419,1.11802,1.1148,0.005,1,0.0,0.0,0.0,0.0,1,1.12585,1.12238,2019-05-22,1


In [46]:
def create_train_test_split(date, time_series, model_info):
#     test_end_date = time_series.loc[date,'date+5']
    test_end_loc = time_series.index.get_loc(date) + 6

#     train_test = time_series.loc[time_series.index <= test_end_date]
    train_test = time_series.iloc[:test_end_loc]

    target_value = time_series.loc[time_series.index == date,'exit_price'].item()
    
    train_test.insert(0, 'target_price', target_value)
    
    model_info['signal'] = time_series.loc[date,'fractal_end']
    
    train_test.insert(0, 'signal', model_info['signal'])
    
    model_info['start'] = len(train_test)-5
    model_info['end'] = len(train_test)-1
    
    model_info['train'] = train_test.iloc[:model_info['start']]
    model_info['test'] = train_test.iloc[model_info['start']:]

    return model_info

In [47]:
def meet_threshold(row):
    if row['signal'] == -1 and row['low'] <= row['target_price']:
        return -1
    elif row['signal'] == 1 and row['high'] >= row['target_price']:
#         print(f"row high: {row['high']} >= row dbl height: {row['target_price']}" )
        return 1    
    else:
        return 0

In [48]:
# def get_5_day_price(row):
    

In [49]:
def ml_decision(row):
    if row['direction'] == -1 and row['preds'] <= row['target_price']:
        return -1
    elif row['direction'] == 1 and row['preds'] >= row['target_price']:
        print(f"preds: {row['preds']} >= row target: {row['target_price']}" )
        return 1    
    else:
        return 0

In [50]:
def create_results_outcomes_dataframe(test): #, predictions):    
    outcomes = pd.DataFrame()
    outcomes['low'] = test['low']
    outcomes['high'] = test['high']
    outcomes['5_day_avg'] = test['5_day_avg']
    outcomes['open'] = test['open']
    outcomes['close'] = test['close']
    outcomes['target_price'] = test['target_price']
    outcomes['direction'] = test['signal']
    outcomes['correct_call'] = test.apply(meet_threshold, axis=1)

    return outcomes

In [51]:
def classify(outcomes):
    
    # As its the benchmark then it is assumed that that a buy/sell decision is made
    
    if max(outcomes['direction']) == 1:
        
        if max(outcomes['correct_call']) == 0:
            return 'fp'
        elif max(outcomes['correct_call']) == 1:
            return 'tp'
        
    elif max(outcomes['direction']) == -1:
        
        if min(outcomes['correct_call']) == 0:
            return 'fp'
        elif min(outcomes['correct_call']) == -1:
            return 'tp'
        
    else:
        return 'ERROR'
    

In [52]:
daily_fract.loc[daily_fract.index == '2000-07-28']

Unnamed: 0_level_0,open,high,low,close,mid,wk_mv_avg,mnth_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,gold_usd,gold_euro,day-1_high,day-2_high,day-3_high,day-4_high,day-1_low,day-2_low,day-3_low,day-4_low,day-1_open,day-2_open,day-3_open,day-4_open,day-1_close,day-2_close,day-3_close,day-4_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,5_day_avg,fractal_end,day+1_frac,day+2_frac,day+3_frac,day+4_frac,select,exit_price,target,date+5,fractal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1
2000-07-28,0.932,0.9338,0.9229,0.9241,0.92805,0.93597,0.941517,0.008063,0.005738,-0.009605,-0.006849,-0.00934,0.0079,0.0115,0.0024,0.0083,1,96.6,122.4,0.9444,0.945,0.9433,0.9367,0.9314,0.9391,0.9329,0.9313,0.9434,0.9411,0.9329,0.9366,0.9319,0.9435,0.9412,0.933,0.9241,0.9295,0.9224,0.9274,0.9293,0.9135,0.9192,0.9117,0.9174,0.8997,0.9103,0.9015,0.00674,1,0.0,0.0,0.0,0.0,1,0.93084,0.9295,2000-08-04,1


In [53]:
model_info = {"train":None,"test":None,"start":None,"end":None,"signal":None}
benchmark_results = []

for match in fractal_pattern['pattern_end']:
#     print(match)
    results_dict = {'name':None,'pattern':None,'date':None,
                   'time_frame':None,'RMSE':None,
                   'MSE':None, 'classification':None}
    
    results_dict['name'] = 'Bechmark: ' + str(match)
    results_dict['strategy'] = 'Maribozu'
    results_dict['time_frame'] = 'daily'

    model_info = create_train_test_split(match, daily_fract, model_info)

    if len(model_info['train']) < 10:
        continue

    outcomes = create_results_outcomes_dataframe(model_info['test'])
    results_dict['classification'] = classify(outcomes)
    print(results_dict['classification'])
    print(outcomes)

    benchmark_results.append(results_dict)


fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2000-07-31  0.9224  0.9295    0.00668  0.9241  0.9274       0.93084   
2000-08-01  0.9135  0.9293    0.00716  0.9275  0.9168       0.93084   
2000-08-02  0.9117  0.9192    0.00670  0.9169  0.9170       0.93084   
2000-08-03  0.8997  0.9174    0.00674  0.9167  0.9050       0.93084   
2000-08-04  0.9015  0.9103    0.00598  0.9047  0.9088       0.93084   

            direction  correct_call  
date                                 
2000-07-31          1             0  
2000-08-01          1             0  
2000-08-02          1             0  
2000-08-03          1             0  
2000-08-04          1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2000-08-08  0.8982  0.9084      0.005  0.9084  0.9010       0.90284   
2000-08-09  0.896

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2001-03-21  0.8936  0.9106    0.00792  0.9096  0.8964       0.90366   
2001-03-22  0.8833  0.8969    0.00730  0.8965  0.8872       0.90366   
2001-03-23  0.8869  0.8970    0.00810  0.8871  0.8921       0.90366   
2001-03-26  0.8905  0.8981    0.00816  0.8920  0.8956       0.90366   
2001-03-27  0.8899  0.8993    0.00662  0.8957  0.8937       0.90366   

            direction  correct_call  
date                                 
2001-03-21         -1            -1  
2001-03-22         -1            -1  
2001-03-23         -1            -1  
2001-03-26         -1            -1  
2001-03-27         -1            -1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2001-03-23  0.8869  0.8970    0.00810  0.8871  0.8921        0.8945   
2001-03-26  0.890

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2001-07-31  0.8727  0.8808      0.005  0.8758  0.8804        0.8809   
2001-08-01  0.8786  0.8843      0.005  0.8803  0.8821        0.8809   
2001-08-02  0.8787  0.8844      0.005  0.8822  0.8821        0.8809   
2001-08-03  0.8784  0.8875      0.005  0.8820  0.8807        0.8809   
2001-08-06  0.8789  0.8827      0.005  0.8806  0.8805        0.8809   

            direction  correct_call  
date                                 
2001-07-31          1             0  
2001-08-01          1             1  
2001-08-02          1             1  
2001-08-03          1             1  
2001-08-06          1             1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2001-08-08  0.8744  0.8824    0.00500  0.8774  0.8815        0.8823   
2001-08-09  0.881

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2003-04-21  1.0824  1.0905    0.00564  1.0895  1.0861       1.09506   
2003-04-22  1.0856  1.1006    0.00720  1.0859  1.0967       1.09506   
2003-04-23  1.0924  1.0985    0.00500  1.0966  1.0963       1.09506   
2003-04-24  1.0963  1.1050    0.00500  1.0964  1.1031       1.09506   
2003-04-25  1.0964  1.1076    0.00500  1.1030  1.1054       1.09506   

            direction  correct_call  
date                                 
2003-04-21          1             0  
2003-04-22          1             1  
2003-04-23          1             1  
2003-04-24          1             1  
2003-04-25          1             1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2003-05-15  1.1373  1.1489    0.00500  1.1448  1.1412        1.1532   
2003-05-16  1.138

fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2003-11-28  1.1915  1.2040    0.00742  1.1917  1.2020       1.18574   
2003-12-01  1.1940  1.2044    0.00576  1.2020  1.1987       1.18574   
2003-12-02  1.1946  1.2118    0.00758  1.1987  1.2090       1.18574   
2003-12-03  1.2049  1.2131    0.00584  1.2090  1.2054       1.18574   
2003-12-04  1.2030  1.2159    0.00614  1.2054  1.2086       1.18574   

            direction  correct_call  
date                                 
2003-11-28         -1             0  
2003-12-01         -1             0  
2003-12-02         -1             0  
2003-12-03         -1             0  
2003-12-04         -1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2003-12-12  1.2137  1.2308    0.00500  1.2227  1.2207        1.2277   
2003-12-15  1.218

fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2004-07-05  1.2273  1.2330    0.00670  1.2318  1.2303       1.22524   
2004-07-06  1.2265  1.2331    0.00514  1.2304  1.2317       1.22524   
2004-07-07  1.2311  1.2391    0.00500  1.2316  1.2359       1.22524   
2004-07-08  1.2343  1.2424    0.00520  1.2358  1.2407       1.22524   
2004-07-09  1.2373  1.2425    0.00500  1.2408  1.2398       1.22524   

            direction  correct_call  
date                                 
2004-07-05         -1             0  
2004-07-06         -1             0  
2004-07-07         -1             0  
2004-07-08         -1             0  
2004-07-09         -1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2004-07-16  1.2345  1.2463    0.00500  1.2374  1.2456        1.2323   
2004-07-19  1.240

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2004-12-20  1.3322  1.3411    0.00780  1.3325  1.3392       1.33912   
2004-12-21  1.3346  1.3410    0.00824  1.3393  1.3356       1.33912   
2004-12-22  1.3347  1.3440    0.00772  1.3355  1.3433       1.33912   
2004-12-23  1.3425  1.3520    0.00622  1.3432  1.3501       1.33912   
2004-12-24  1.3487  1.3550    0.00578  1.3502  1.3540       1.33912   

            direction  correct_call  
date                                 
2004-12-20          1             1  
2004-12-21          1             1  
2004-12-22          1             1  
2004-12-23          1             1  
2004-12-24          1             1  
fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2005-01-04  1.3252  1.3497    0.00778  1.3486  1.3268        1.3537   
2005-01-05  1.321

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2005-05-23  1.2537  1.2599      0.005  1.2567  1.2568        1.2616   
2005-05-24  1.2562  1.2630      0.005  1.2569  1.2594        1.2616   
2005-05-25  1.2554  1.2620      0.005  1.2593  1.2601        1.2616   
2005-05-26  1.2497  1.2605      0.005  1.2602  1.2518        1.2616   
2005-05-27  1.2513  1.2591      0.005  1.2519  1.2540        1.2616   

            direction  correct_call  
date                                 
2005-05-23          1             0  
2005-05-24          1             1  
2005-05-25          1             1  
2005-05-26          1             0  
2005-05-27          1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2005-06-06  1.2233  1.2296    0.00602  1.2263  1.2256       1.21746   
2005-06-07  1.225

2005-09-30          1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2005-10-05  1.1931  1.2060    0.00534  1.1942  1.2054        1.1993   
2005-10-06  1.2039  1.2205    0.00732  1.2055  1.2169        1.1993   
2005-10-07  1.2093  1.2181    0.00620  1.2170  1.2124        1.1993   
2005-10-10  1.2022  1.2154    0.00764  1.2125  1.2033        1.1993   
2005-10-11  1.1958  1.2049    0.00834  1.2034  1.1981        1.1993   

            direction  correct_call  
date                                 
2005-10-05          1             1  
2005-10-06          1             1  
2005-10-07          1             1  
2005-10-10          1             1  
2005-10-11          1             1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2005-10-06  1.2039  1.2205    0.00732  1.2055  1.2

fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2006-04-26  1.2387  1.2472    0.00500  1.2420  1.2454        1.2369   
2006-04-27  1.2407  1.2550    0.00500  1.2455  1.2526        1.2369   
2006-04-28  1.2525  1.2644    0.00564  1.2527  1.2629        1.2369   
2006-05-01  1.2557  1.2693    0.00584  1.2628  1.2585        1.2369   
2006-05-02  1.2562  1.2669    0.00620  1.2586  1.2646        1.2369   

            direction  correct_call  
date                                 
2006-04-26         -1             0  
2006-04-27         -1             0  
2006-04-28         -1             0  
2006-05-01         -1             0  
2006-05-02         -1             0  
fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2006-05-17  1.2703  1.2921    0.00830  1.2876  1.2763       1.29398   
2006-05-18  1.273

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2007-02-09  1.2986  1.3045    0.00500  1.3042  1.3018        1.2991   
2007-02-12  1.2943  1.3039    0.00500  1.3019  1.2962        1.2991   
2007-02-13  1.2961  1.3057    0.00500  1.2963  1.3034        1.2991   
2007-02-14  1.3026  1.3152    0.00538  1.3035  1.3135        1.2991   
2007-02-15  1.3121  1.3174    0.00516  1.3134  1.3140        1.2991   

            direction  correct_call  
date                                 
2007-02-09         -1            -1  
2007-02-12         -1            -1  
2007-02-13         -1            -1  
2007-02-14         -1             0  
2007-02-15         -1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2007-02-23  1.3105  1.3201      0.005  1.3125  1.3184        1.3174   
2007-02-26  1.315

fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2007-08-21  1.3451  1.3521      0.005  1.3477  1.3471        1.3411   
2007-08-22  1.3466  1.3561      0.005  1.3470  1.3553        1.3411   
2007-08-23  1.3537  1.3590      0.005  1.3554  1.3569        1.3411   
2007-08-24  1.3555  1.3689      0.005  1.3570  1.3673        1.3411   
2007-08-27  1.3627  1.3681      0.005  1.3672  1.3632        1.3411   

            direction  correct_call  
date                                 
2007-08-21         -1             0  
2007-08-22         -1             0  
2007-08-23         -1             0  
2007-08-24         -1             0  
2007-08-27         -1             0  
fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2007-09-07  1.3665  1.3800    0.00500  1.3668  1.3774        1.3619   
2007-09-10  1.377

2008-04-09          1             1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2008-04-22  1.5835  1.6020    0.00896  1.5916  1.5994       1.59992   
2008-04-23  1.5834  1.6000    0.00866  1.5993  1.5847       1.59992   
2008-04-24  1.5639  1.5873    0.01072  1.5848  1.5694       1.59992   
2008-04-25  1.5556  1.5703    0.01040  1.5695  1.5640       1.59992   
2008-04-28  1.5597  1.5696    0.00896  1.5639  1.5654       1.59992   

            direction  correct_call  
date                                 
2008-04-22          1             1  
2008-04-23          1             1  
2008-04-24          1             0  
2008-04-25          1             0  
2008-04-28          1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2008-05-07  1.5286  1.5518    0.00934  1.5508  1.5

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2008-10-15  1.3428  1.3690    0.01014  1.3580  1.3476       1.34824   
2008-10-16  1.3348  1.3540    0.00768  1.3474  1.3466       1.34824   
2008-10-17  1.3388  1.3514    0.00760  1.3467  1.3457       1.34824   
2008-10-20  1.3290  1.3532    0.00712  1.3458  1.3332       1.34824   
2008-10-21  1.2912  1.3339    0.01318  1.3333  1.2922       1.34824   

            direction  correct_call  
date                                 
2008-10-15         -1            -1  
2008-10-16         -1            -1  
2008-10-17         -1            -1  
2008-10-20         -1            -1  
2008-10-21         -1            -1  
fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2008-10-17  1.3388  1.3514    0.00760  1.3467  1.3457       1.35428   
2008-10-20  1.329

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2009-06-02  1.4102  1.4331    0.01298  1.4168  1.4305       1.40654   
2009-06-03  1.4109  1.4338    0.01252  1.4304  1.4168       1.40654   
2009-06-04  1.4070  1.4241    0.00988  1.4167  1.4201       1.40654   
2009-06-05  1.3926  1.4268    0.01100  1.4200  1.3983       1.40654   
2009-06-08  1.3805  1.3996    0.01270  1.3982  1.3871       1.40654   

            direction  correct_call  
date                                 
2009-06-02         -1             0  
2009-06-03         -1             0  
2009-06-04         -1             0  
2009-06-05         -1            -1  
2009-06-08         -1            -1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2009-06-11  1.3943  1.4178    0.01292  1.4027  1.4098       1.39062   
2009-06-12  1.393

fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2009-10-28  1.4683  1.4840    0.00750  1.4812  1.4718       1.48792   
2009-10-29  1.4702  1.4859    0.00936  1.4718  1.4855       1.48792   
2009-10-30  1.4684  1.4857    0.01104  1.4854  1.4760       1.48792   
2009-11-02  1.4728  1.4845    0.00842  1.4759  1.4784       1.48792   
2009-11-03  1.4626  1.4807    0.00800  1.4783  1.4733       1.48792   

            direction  correct_call  
date                                 
2009-10-28          1             0  
2009-10-29          1             0  
2009-10-30          1             0  
2009-11-02          1             0  
2009-11-03          1             0  
fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2009-11-16  1.4880  1.5016    0.00500  1.4966  1.4968       1.50228   
2009-11-17  1.480

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2010-04-13  1.3544  1.3664    0.01108  1.3574  1.3660        1.3472   
2010-04-14  1.3593  1.3678    0.01010  1.3659  1.3649        1.3472   
2010-04-15  1.3520  1.3649    0.01158  1.3648  1.3541        1.3472   
2010-04-16  1.3445  1.3562    0.00730  1.3542  1.3462        1.3472   
2010-04-19  1.3415  1.3500    0.00598  1.3461  1.3477        1.3472   

            direction  correct_call  
date                                 
2010-04-13         -1             0  
2010-04-14         -1             0  
2010-04-15         -1             0  
2010-04-16         -1            -1  
2010-04-19         -1            -1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2010-04-27  1.3143  1.3385    0.01026  1.3379  1.3215        1.3298   
2010-04-28  1.311

tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2010-09-03  1.2808  1.2900    0.00654  1.2818  1.2893       1.27618   
2010-09-06  1.2788  1.2918    0.00652  1.2894  1.2807       1.27618   
2010-09-07  1.2676  1.2814    0.00778  1.2806  1.2691       1.27618   
2010-09-08  1.2659  1.2763    0.00650  1.2692  1.2711       1.27618   
2010-09-09  1.2644  1.2765    0.00688  1.2712  1.2664       1.27618   

            direction  correct_call  
date                                 
2010-09-03         -1             0  
2010-09-06         -1             0  
2010-09-07         -1            -1  
2010-09-08         -1            -1  
2010-09-09         -1            -1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2010-09-09  1.2644  1.2765    0.00688  1.2712  1.2664        1.2776   
2010-09-10  1.266

2011-02-18          1             1  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2011-02-17  1.3536  1.3626    0.00500  1.3580  1.3604        1.3526   
2011-02-18  1.3544  1.3715    0.00500  1.3605  1.3674        1.3526   
2011-02-21  1.3562  1.3708    0.00500  1.3675  1.3591        1.3526   
2011-02-22  1.3524  1.3712    0.00658  1.3592  1.3693        1.3526   
2011-02-23  1.3682  1.3786    0.00718  1.3694  1.3775        1.3526   

            direction  correct_call  
date                                 
2011-02-17         -1             0  
2011-02-18         -1             0  
2011-02-21         -1             0  
2011-02-22         -1            -1  
2011-02-23         -1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2011-03-10  1.3774  1.3878    0.00500  1.3868  1.3

fp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2011-08-09  1.4183  1.4397    0.01488  1.4222  1.4349       1.40902   
2011-08-10  1.4122  1.4400    0.01558  1.4348  1.4217       1.40902   
2011-08-11  1.4103  1.4293    0.01216  1.4216  1.4183       1.40902   
2011-08-12  1.4149  1.4317    0.00982  1.4182  1.4289       1.40902   
2011-08-15  1.4263  1.4476    0.01072  1.4290  1.4428       1.40902   

            direction  correct_call  
date                                 
2011-08-09         -1             0  
2011-08-10         -1             0  
2011-08-11         -1             0  
2011-08-12         -1             0  
2011-08-15         -1             0  
tp
               low    high  5_day_avg    open   close  target_price  \
date                                                                  
2011-08-16  1.4351  1.4471    0.00898  1.4427  1.4387       1.43208   
2011-08-17  1.432

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2012-02-23  1.32536  1.33788   0.006596  1.32572  1.33693      1.332324   
2012-02-24  1.33566  1.34856   0.006596  1.33692  1.34490      1.332324   
2012-02-27  1.33658  1.34637   0.005650  1.34489  1.34353      1.332324   
2012-02-28  1.33890  1.34848   0.005716  1.34354  1.34832      1.332324   
2012-02-29  1.33142  1.34848   0.007806  1.34833  1.33463      1.332324   

            direction  correct_call  
date                                 
2012-02-23          1             1  
2012-02-24          1             1  
2012-02-27          1             1  
2012-02-28          1             1  
2012-02-29          1             1  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2012-03-19  1.31412  1.32645   0.007194  1.31708  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2012-08-30  1.24866  1.25630   0.005000  1.25374  1.25160       1.24872   
2012-08-31  1.24929  1.26369   0.005000  1.25157  1.25731       1.24872   
2012-09-03  1.25620  1.26271   0.005000  1.25732  1.26159       1.24872   
2012-09-04  1.25221  1.26260   0.005000  1.26158  1.25354       1.24872   
2012-09-05  1.25010  1.26236   0.005506  1.25352  1.26086       1.24872   

            direction  correct_call  
date                                 
2012-08-30         -1            -1  
2012-08-31         -1             0  
2012-09-03         -1             0  
2012-09-04         -1             0  
2012-09-05         -1             0  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2012-09-10  1.27543  1.28029   0.006852  1.27860  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2013-02-20  1.32546  1.34310   0.006774  1.34130  1.32647       1.33631   
2013-02-21  1.31606  1.32757   0.006158  1.32648  1.32118       1.33631   
2013-02-22  1.31444  1.32446   0.006142  1.32120  1.31890       1.33631   
2013-02-25  1.30377  1.33180   0.008436  1.31889  1.30534       1.33631   
2013-02-26  1.30176  1.31218   0.007390  1.30536  1.30633       1.33631   

            direction  correct_call  
date                                 
2013-02-20         -1            -1  
2013-02-21         -1            -1  
2013-02-22         -1            -1  
2013-02-25         -1            -1  
2013-02-26         -1            -1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2013-03-01  1.29658  1.31003   0.006970  1.30738  1.

2013-09-30          1             1  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2013-09-27  1.34772  1.35636      0.005  1.34792  1.35025       1.34289   
2013-09-30  1.34770  1.35553      0.005  1.35024  1.35340       1.34289   
2013-10-01  1.35068  1.35872      0.005  1.35342  1.35192       1.34289   
2013-10-02  1.35042  1.36225      0.005  1.35194  1.36046       1.34289   
2013-10-03  1.35861  1.36453      0.005  1.36045  1.36215       1.34289   

            direction  correct_call  
date                                 
2013-09-27         -1             0  
2013-09-30         -1             0  
2013-10-01         -1             0  
2013-10-02         -1             0  
2013-10-03         -1             0  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2013-10-08  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2014-05-01  1.38565  1.38885      0.005  1.38693  1.38606       1.39191   
2014-05-02  1.38115  1.38838      0.005  1.38607  1.38742       1.39191   
2014-05-05  1.38685  1.38853      0.005  1.38741  1.38793       1.39191   
2014-05-06  1.38749  1.39504      0.005  1.38792  1.39243       1.39191   
2014-05-07  1.39047  1.39379      0.005  1.39244  1.39154       1.39191   

            direction  correct_call  
date                                 
2014-05-01          1             0  
2014-05-02          1             0  
2014-05-05          1             0  
2014-05-06          1             1  
2014-05-07          1             1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2014-06-11  1.35212  1.35563      0.005  1.35320  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2014-12-26  1.21672  1.22166      0.005  1.22163  1.21972       1.21663   
2014-12-29  1.21321  1.22208      0.005  1.21970  1.21401       1.21663   
2014-12-30  1.21238  1.21874      0.005  1.21402  1.21578       1.21663   
2014-12-31  1.20959  1.21596      0.005  1.21582  1.20969       1.21663   
2015-01-01  1.20482  1.21072      0.005  1.20965  1.20528       1.21663   

            direction  correct_call  
date                                 
2014-12-26         -1             0  
2014-12-29         -1            -1  
2014-12-30         -1            -1  
2014-12-31         -1            -1  
2015-01-01         -1            -1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2015-01-13  1.17534  1.18522   0.005000  1.18367  1.

fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2015-07-15  1.09121  1.10355   0.005104  1.10055  1.09340      1.105964   
2015-07-16  1.08553  1.09485   0.005364  1.09339  1.08996      1.105964   
2015-07-17  1.08204  1.09070   0.006220  1.08997  1.08217      1.105964   
2015-07-20  1.08082  1.08700   0.005000  1.08215  1.08265      1.105964   
2015-07-21  1.08115  1.09684   0.005930  1.08265  1.09342      1.105964   

            direction  correct_call  
date                                 
2015-07-15          1             0  
2015-07-16          1             0  
2015-07-17          1             0  
2015-07-20          1             0  
2015-07-21          1             0  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2015-07-23  1.09233  1.10177      0.005  1.09450  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2015-11-24  1.06195  1.06769      0.005  1.06290  1.06564       1.06791   
2015-11-25  1.05660  1.06892      0.005  1.06566  1.06206       1.06791   
2015-11-26  1.05998  1.06237      0.005  1.06207  1.06070       1.06791   
2015-11-27  1.05685  1.06380      0.005  1.06073  1.05857       1.06791   
2015-11-30  1.05578  1.05968      0.005  1.05856  1.05741       1.06791   

            direction  correct_call  
date                                 
2015-11-24          1             0  
2015-11-25          1             1  
2015-11-26          1             0  
2015-11-27          1             0  
2015-11-30          1             0  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2015-12-18  1.08050  1.08828   0.005222  1.08464  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2016-04-19  1.13150  1.13845      0.005  1.13227  1.13628       1.12729   
2016-04-20  1.12869  1.13877      0.005  1.13627  1.12956       1.12729   
2016-04-21  1.12696  1.13987      0.005  1.12954  1.13009       1.12729   
2016-04-22  1.12177  1.13052      0.005  1.13010  1.12441       1.12729   
2016-04-25  1.12310  1.12781      0.005  1.12442  1.12698       1.12729   

            direction  correct_call  
date                                 
2016-04-19         -1             0  
2016-04-20         -1             0  
2016-04-21         -1            -1  
2016-04-22         -1            -1  
2016-04-25         -1            -1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2016-04-26  1.12562  1.13397   0.005000  1.12696  1.

fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2016-12-19  1.03925  1.04793   0.005996  1.04609  1.04021      1.052496   
2016-12-20  1.03522  1.04191   0.006158  1.04022  1.04124      1.052496   
2016-12-21  1.03883  1.04510   0.005000  1.04123  1.04372      1.052496   
2016-12-22  1.04292  1.04993   0.005000  1.04377  1.04486      1.052496   
2016-12-23  1.04263  1.04685   0.005000  1.04484  1.04589      1.052496   

            direction  correct_call  
date                                 
2016-12-19          1             0  
2016-12-20          1             0  
2016-12-21          1             0  
2016-12-22          1             0  
2016-12-23          1             0  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2016-12-23  1.04263  1.04685      0.005  1.04484  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2017-04-13  1.06090  1.06776      0.005  1.06716  1.06129       1.06218   
2017-04-14  1.06024  1.06294      0.005  1.06128  1.06177       1.06218   
2017-04-17  1.06150  1.06705      0.005  1.06176  1.06471       1.06218   
2017-04-18  1.06411  1.07358      0.005  1.06471  1.07197       1.06218   
2017-04-19  1.07000  1.07367      0.005  1.07196  1.07202       1.06218   

            direction  correct_call  
date                                 
2017-04-13         -1            -1  
2017-04-14         -1            -1  
2017-04-17         -1            -1  
2017-04-18         -1             0  
2017-04-19         -1             0  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2017-05-02  1.08877  1.09369      0.005  1.09152  1.

2017-10-10          1             1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2017-10-11  1.18012  1.18777      0.005  1.18079  1.18751       1.17578   
2017-10-12  1.18249  1.18796      0.005  1.18751  1.18477       1.17578   
2017-10-13  1.17978  1.18746      0.005  1.18477  1.18061       1.17578   
2017-10-16  1.17748  1.18196      0.005  1.18060  1.17790       1.17578   
2017-10-17  1.17359  1.17814      0.005  1.17790  1.17698       1.17578   

            direction  correct_call  
date                                 
2017-10-11         -1             0  
2017-10-12         -1             0  
2017-10-13         -1             0  
2017-10-16         -1             0  
2017-10-17         -1            -1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2017-10-17  1.

2018-02-12          1             0  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2018-02-14  1.22760  1.24731   0.005000  1.23774  1.24564      1.232552   
2018-02-15  1.24545  1.25550   0.005884  1.24565  1.25482      1.232552   
2018-02-16  1.23932  1.25544   0.007878  1.25483  1.24156      1.232552   
2018-02-19  1.23690  1.24270   0.008088  1.24155  1.23874      1.232552   
2018-02-20  1.23176  1.23964   0.007946  1.23875  1.23217      1.232552   

            direction  correct_call  
date                                 
2018-02-14         -1            -1  
2018-02-15         -1             0  
2018-02-16         -1             0  
2018-02-19         -1             0  
2018-02-20         -1            -1  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2018-02-20  1.

2018-07-31          1             1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2018-08-01  1.16505  1.16994      0.005  1.16795  1.16508       1.16296   
2018-08-02  1.15809  1.16538      0.005  1.16508  1.15908       1.16296   
2018-08-03  1.15567  1.16104      0.005  1.15908  1.15635       1.16296   
2018-08-06  1.15300  1.15702      0.005  1.15635  1.15553       1.16296   
2018-08-07  1.15514  1.16201      0.005  1.15552  1.16186       1.16296   

            direction  correct_call  
date                                 
2018-08-01         -1             0  
2018-08-02         -1            -1  
2018-08-03         -1            -1  
2018-08-06         -1            -1  
2018-08-07         -1            -1  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2018-08-03  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2018-12-25  1.13892  1.14198      0.005  1.14135  1.14054       1.14635   
2018-12-26  1.13428  1.14141      0.005  1.14054  1.13845       1.14635   
2018-12-27  1.13646  1.14669      0.005  1.13845  1.14512       1.14635   
2018-12-28  1.14222  1.14727      0.005  1.14514  1.14223       1.14635   
2018-12-31  1.14213  1.14674      0.005  1.14222  1.14644       1.14635   

            direction  correct_call  
date                                 
2018-12-25          1             0  
2018-12-26          1             0  
2018-12-27          1             1  
2018-12-28          1             1  
2018-12-31          1             1  
fp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2019-02-05  1.13958  1.14356      0.005  1.14321  1.

tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2019-07-12  1.12381  1.12748      0.005  1.12717  1.12723       1.12218   
2019-07-15  1.12532  1.12838      0.005  1.12723  1.12597       1.12218   
2019-07-16  1.12018  1.12618      0.005  1.12597  1.12133       1.12218   
2019-07-17  1.11997  1.12416      0.005  1.12135  1.12392       1.12218   
2019-07-18  1.12051  1.12820      0.005  1.12393  1.12624       1.12218   

            direction  correct_call  
date                                 
2019-07-12         -1             0  
2019-07-15         -1             0  
2019-07-16         -1            -1  
2019-07-17         -1            -1  
2019-07-18         -1            -1  
tp
                low     high  5_day_avg     open    close  target_price  \
date                                                                      
2019-07-30  1.11321  1.11610   0.005000  1.11429  1.

In [54]:
def create_cm(results):
    
    res_cm = [[0,0],
              [0,0]]
    
    for result in results:
        res = result['classification']
        
        if res == 'tp':
            res_cm[0][0] += 1
        elif res == 'fp':
            res_cm[0][1] += 1
        elif res == 'fn':
            res_cm[1][0] += 1
        elif res == 'tn':
            res_cm[1][1] += 1
    
    return res_cm

In [55]:
cm = create_cm(benchmark_results)

In [56]:
cm_df = pd.DataFrame(cm, index=['pred_success', 'pred_non_success'], columns=['actual success', 'actual non_success'])
cm_df

Unnamed: 0,actual success,actual non_success
pred_success,410,202
pred_non_success,0,0


In [57]:
def print_metrics(cm):
    # Accuracy - how many did the model get right
    # Total number of correct predictions / total number of predictions
    acc= (cm[0][0]+cm[1][1])/(np.sum(cm))
    
    # Precision proportion of positive identifications that were actually correct
    # True positives/ true positives + false positives)
    prec = cm[0][0]/(cm[0][0]+cm[0][1])
    
    # Recall - proportion of actual positives that were correctly defined
    # True positives/ true positives + false negatives
    rec = cm[0][0]/(cm[0][0]+cm[1][0])

    print(f"Accuracy:\t{round(acc,2)}\nPrecision:\t{round(prec,2)}\nRecall:\t\t{round(rec,2)}")

In [58]:
# Display the results
print_metrics(cm)

Accuracy:	0.67
Precision:	0.67
Recall:		1.0


## 1.2 Train Test Split

In [59]:
type(daily.loc[daily.index == daily_pattern.loc[10]['pattern_end']].index[0])

NameError: name 'daily' is not defined

In [None]:
daily.loc[daily.index == daily_pattern.loc[10]['pattern_end']].index[0]

In [None]:
daily_pattern.index

In [None]:
# Test 1 date out
curr_pattern = daily.loc[daily.index == daily_pattern.loc[10]['pattern_end']].index[0]
curr_pattern

In [None]:
daily.index

In [None]:
test_end_date = daily.loc[daily.loc[daily.index == daily_pattern.loc[10]['pattern_end']].index[0],'date+5']
test_end_date

In [None]:
# daily.loc[daily.index == curr_pattern]

In [None]:
train_test = daily.loc[daily.index <= test_end_date]
# train_test = daily.loc[daily.index <= '2004-2-28 00:00:00']

In [None]:
# daily.loc[daily.index <= end_date]

In [None]:
# daily.loc[daily.index == daily_pattern.loc[10]['pattern_end'],'double_height']

In [None]:
target_value = daily.loc[daily.index == daily_pattern.loc[10,'pattern_end'],'double_height'].item()
target_value

In [None]:
# def choose_exit_price(row, target_price, signal=-1):
#     if signal == -1:
#         return target_price
# #         return row['close'] - (row['height'] * 1)
#     else:
#         return target_price

# #         return row['close'] + (row['height'] * 1)

In [None]:
train_test

In [None]:
# train_test['double_height'] = train_test.apply(choose_exit_price, axis=1)
# train_test['double_height'] = daily.loc[daily.index == daily_pattern.loc[10,'pattern_end'],'double_height'].item()
#train_test.loc['double_height'] = [target_value for x in train_test.loc[:,['double_height']]]
train_test.insert(0, 'target_price', target_value)
# train_test.insert(0, 'signal', signal)

In [None]:
signal = daily.loc[daily.index == daily_pattern.loc[10,'pattern_end'],'marubozu'].item()
signal

In [None]:
train_test.head()

In [None]:
[signal] * (len(train_test)-1)

In [None]:
#train_test.loc[:,['signal']] = [signal] * (len(train_test))
# train_test.loc[:,['signal']] = [signal]
# df.insert(0, 'A', 'foo')
train_test.insert(0, 'signal', signal)

In [None]:
train_test.tail(6)

In [None]:
# start=len(train)
# end=len(train)+len(test)-1
start = len(train_test)-5
end = len(train_test)-1
start, end

In [None]:
# Set for testing
train = train_test.iloc[:start]
test = train_test.iloc[start:]

In [None]:
test.head()

In [None]:
def train_test_plot(train, test):
    plt.figure(figsize=(16, 8))
    plt.plot(train, c='blue')
    plt.plot(test, c='orange');

In [None]:
# This plot confirms that our train test split makes sense
train_test_plot(train['close'], test['close'])

In [None]:
auto_arima(daily['close'].dropna(), seasonal=False).summary()

In [None]:
train

In [None]:
model = ARIMA(train['low'], order=(0,1,0))
results = model.fit()
results.summary()

In [None]:
predictions = results.predict(start=start, end=end, dynamic=False, typ='levels').rename('ARIMA-0-1-0 Predictions')

In [None]:
predictions.values

In [None]:
type(predictions)

In [None]:
def justified(row):
    
    if row['signal'] == -1 and row['low'] <= row['target_price']:
        return 1
    elif row['signal'] == 1 and row['high'] >= row['target_price']:
        return 1    
    else:
        return 0

In [None]:
outcomes = pd.DataFrame()
outcomes['low'] = test['low']
outcomes['high'] = test['high']

outcomes['preds'] = predictions.values
outcomes['target_price'] = test['target_price']
# outcomes['direction'] = test['signal']
outcomes['signal_match'] = test.apply(justified, axis=1)

#daily_pre['target_price'] = daily_pre.apply(choose_exit_price, axis=1)
# outcomes.append(predictions, ignore_index=True)
outcomes

In [None]:
# predictions['date']  = test.index
#predictions.reset_index(test.index)

In [None]:
# predictions.reindex(test.index)

In [None]:
type(predictions)

In [None]:
test.head()['close'].isnull().sum()

In [None]:
train.head()['close'].isnull().sum()

In [None]:
outcomes['low'].plot(legend=True, figsize=(12,8))
outcomes['preds'].plot(legend=True);
outcomes['target_price'].plot(legend=True);

# predictions.plot(legend=True)

In [None]:

error = mean_squared_error(test['close'], predictions)
print(f'ARIMA(0,1,0) MSE Error: {error:11.10}')


error = rmse(test['close'], predictions)
print(f'ARIMA(0,1,0) RMSE Error: {error:11.10}')

In [None]:
results = {'algo':'','name':'','date':'', 'time_frame':'','success':'','RMSE':'', 'MSE':'', 'classification':'' }


In [None]:
daily.columns

---

# SARIMAX


In [None]:
# daily = daily.resample('B').agg({'open':'first','high':'max',
#                                         'low':'min', 'close':'last'})

In [None]:
daily.index

In [None]:
daily['close'].dropna(inplace=True)

In [None]:
result = seasonal_decompose(daily['close'], model='add', period=400 )
result.plot();

In [None]:
%%time
auto_arima(daily['close'], seasonal=True, maxiter=10000).summary()

In [None]:
model = SARIMAX(train['close'], order=(0,1,0), seasonal_order=(1,0,1,12))

In [None]:
len(train)

In [None]:
train.columns

In [None]:
# Starting MSE and (P, D, Q).
mse = 99 * (10 ** 16)
final_P = 0
final_D = 0
final_Q = 0

for P in range(3):
    for Q in range(3):
        for D in range(3):
            try:
                # Instantiate SARIMA model.
                sarima = SARIMAX(endog = train['close'],
                                 order = (0, 1, 0),              # (p, d, q)
                                 seasonal_order = (P, D, Q, 12)) # (P, D, Q, S)

                # Fit SARIMA model.
                model = sarima.fit()

                # Generate predictions based on training set.
                # Start at time period 0 and end at 1028.
                preds = model.predict(start=0, end=1028)

                # Evaluate predictions.
                print(f'The MSE for (1, 0, 0)x({P},{D},{Q},12) is: {mean_squared_error(train["close"], preds)}')
                
                # Save for final report.
                if mse > mean_squared_error(train['close'], preds):
                    mse = mean_squared_error(train['close'], preds)
                    final_P = P
                    final_D = D
                    final_Q = Q
                
            except:
                print(f"p: {P}, D: {D}, Q: {Q}")
                pass

print(f'Our model that minimizes MSE on the training data is the SARIMA(1, 0, 0)x({final_P},{final_D},{final_Q},420).')
print(f'This model has an MSE of {mse}.')