# 2.0 Resample, Pattern Matching and Feature Engineering

# Contents

- [1.0 Load Data](#1.0-Load-Data)
- [2.0 Resample Data](#2.0-Resample-Data)
    - [2.1 Monthly Data](#2.1-Monthly-Data)
    - [2.2 Daily Data](#2.2-Daily-Data)
    - [2.3 4-Hour Data](#2.3-4-Hour-Data)
- [3.0 Pattern Matching](#3.0-Pattern-Matching)
    - [3.1 Monthly Data](#3.1-Monthly-Data)
    - [3.2 Daily Data](#3.2-Daily-Data)
    - [3.3 4-Hour Data](#3.3-4-Hour-Data)


In [124]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import datetime
import calendar
import plotly.graph_objects as go


In [125]:
pd.set_option('display.max_columns', None)

---

# 1.0 Load Data

In [126]:
eur_usd = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/csv/eur_usd.csv', 
                      index_col='date', parse_dates=True)

In [127]:
# eur_usd['day_name'] = eur_usd.index.day_name()
eur_usd = eur_usd[['open','high', 'low','close']]

In [128]:
eur_usd.head()

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-05-30 17:27:00,0.9302,0.9302,0.9302,0.9302
2000-05-31 00:50:00,0.9315,0.9315,0.9315,0.9315
2000-05-31 00:51:00,0.9315,0.9315,0.9315,0.9315
2000-05-31 00:55:00,0.9317,0.9317,0.9317,0.9317
2000-05-31 01:01:00,0.9318,0.9318,0.9318,0.9318


In [129]:
eur_usd.isnull().sum()

open     0
high     0
low      0
close    0
dtype: int64

In [130]:
# eur_usd['wday'] = eur_usd.index.day_name()

In [131]:
eur_usd.shape

(6539011, 4)

In [132]:
eur_usd.head()

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-05-30 17:27:00,0.9302,0.9302,0.9302,0.9302
2000-05-31 00:50:00,0.9315,0.9315,0.9315,0.9315
2000-05-31 00:51:00,0.9315,0.9315,0.9315,0.9315
2000-05-31 00:55:00,0.9317,0.9317,0.9317,0.9317
2000-05-31 01:01:00,0.9318,0.9318,0.9318,0.9318


In [133]:
eur_usd.loc[eur_usd.index == '2001-01-9']

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001-01-09,0.9447,0.9447,0.9446,0.9446


---

## Holidays

In [134]:
holidays = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/EUR-USD Holidays/EUR-USD_Holidays.csv',
                   index_col='Date', parse_dates=True)

In [135]:
holidays.head()

Unnamed: 0_level_0,Reason
Date,Unnamed: 1_level_1
2012-01-05,Missing value; holiday or weekend (H)
2012-09-04,Missing value; holiday or weekend (H)
2012-06-04,Missing value; holiday or weekend (H)
2011-12-26,Missing value; holiday or weekend (H)
2011-04-25,Missing value; holiday or weekend (H)


In [136]:
holidays.isnull().sum()

Reason    0
dtype: int64

---

## Gold

In [137]:
gold = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/gold/gold_clean.csv',
                   index_col='date', parse_dates=True)

In [138]:
gold.head()

Unnamed: 0_level_0,usd,euro
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-05-30,94.9,119.5
2000-05-31,94.6,119.8
2000-06-01,94.7,119.4
2000-06-02,97.9,121.7
2000-06-05,98.4,122.4


In [139]:
gold.rename(columns={"usd":"gold_usd","euro":"gold_euro"}, inplace=True)

In [140]:
eur_usd.loc[eur_usd.index[-1]].name

Timestamp('2019-12-31 16:59:00')

In [141]:
gold = gold.loc[gold.index <= eur_usd.loc[eur_usd.index[-1]].name]

In [142]:
gold.head()

Unnamed: 0_level_0,gold_usd,gold_euro
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-05-30,94.9,119.5
2000-05-31,94.6,119.8
2000-06-01,94.7,119.4
2000-06-02,97.9,121.7
2000-06-05,98.4,122.4


In [143]:
gold.shape

(5111, 2)

In [144]:
eur_usd.shape

(6539011, 4)

---

# 2.1 Monthly Data

In [145]:
monthly = eur_usd.resample('m').agg({'open':'first','high':'max',
                                    'low':'min', 'close':'last'})
monthly.head(14)

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-05-31,0.9302,0.9382,0.9151,0.9377
2000-06-30,0.9376,0.9704,0.9287,0.9538
2000-07-31,0.9526,0.9598,0.9193,0.9274
2000-08-31,0.9275,0.9293,0.8841,0.8894
2000-09-30,0.8895,0.904,0.8443,0.8825
2000-10-31,0.8841,0.8861,0.8229,0.848
2000-11-30,0.8481,0.877,0.8377,0.877
2000-12-31,0.8769,0.9391,0.8705,0.9373
2001-01-31,0.947,0.9599,0.9117,0.9413
2001-02-28,0.9414,0.9447,0.9018,0.9248


In [146]:
monthly.shape

(236, 4)

In [147]:
monthly.isnull().sum()

open     0
high     0
low      0
close    0
dtype: int64

In [148]:
# monthly.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/monthly.csv', index=True)

---

# 2.2 Daily Data

In [514]:
# create the super class for pattern identification

class Process_Market_Data(object):
    
    # Initialise parent class
    def __init__(self, market_data, data_name='eur-usd', time_frame='daily', 
                path_to_save_md='/Users/stuartdaw/Documents/Capstone_data/data/resampled/',
                path_to_save_ptn= '/Users/stuartdaw/Documents/Capstone_data/data/targets/',
                file_type='.csv'):
        self.market_data = market_data
        self.data_name = data_name
        self.time_frame = time_frame
        self.path_to_save_md = path_to_save_md
        self.path_to_save_ptn = path_to_save_ptn
        self.file_type = file_type
        
    # Function to drive the other methods
    def process_data(self):
        # Get the correct time code
        self.get_time_code()
        self.resample_data()
        self.create_daily_mid_point()
        self.create_moving_avgs()
        self.remove_nans()
        self.add_vol()
        self.add_percent_change()
        self.add_height_info()
        self.add_height_of_prior_3_periods()
        self.apply_time_frame_direction()
        self.remove_nans()
        
        
    def get_time_code(self):
        if self.time_frame == 'daily':
            # 'B' is for business days
            self.time_code = 'B'
            
    # Create Daily Data and remove weekend as doesnt have same liquidity and not 
    # going to use to form patterns
    def resample_data(self):
        self.market_data = self.market_data.resample(self.time_code).agg({'open':'first','high':'max',
                                                                            'low':'min','close':'last',})
    
    def create_daily_mid_point(self):
        self.market_data['mid'] = (self.market_data['close']+self.market_data['open'])/2
    
    def create_moving_avgs(self):
        if self.time_frame == 'daily':
            self.market_data['bi_mnth_mv_avg'] = self.market_data['mid'].rolling(2).mean()
            self.market_data['qtr_mv_avg'] = self.market_data['mid'].rolling(3).mean()
        elif self.time_frame == 'monthly':
            self.market_data['wk_mv_avg'] = self.market_data['mid'].rolling(5).mean()
            self.market_data['mnth_mv_avg'] = self.market_data['mid'].rolling(21).mean()
    
    def add_vol(self):
        if self.time_frame == 'daily':
            self.market_data['volatility_3_day'] = self.market_data['mid'].pct_change().rolling(3).std()
            self.market_data['volatility_10_day'] = self.market_data['mid'].pct_change().rolling(10).std()
        elif self.time_frame == 'monthly':
            self.market_data['volatility_2_mnth'] = self.market_data['mid'].pct_change().rolling(2).std()
            self.market_data['volatility_qtr'] = self.market_data['mid'].pct_change().rolling(3).std()
    
    def add_percent_change(self):
        self.market_data['pct_chge_3_prds'] = self.market_data['mid'].pct_change(3)
        self.market_data['pct_chge_5_prds'] = self.market_data['mid'].pct_change(5)
        self.market_data['pct_chge_10_prds'] = self.market_data['mid'].pct_change(10)
    
    def add_height_info(self):
        self.market_data['height'] = abs(self.market_data['close'] - self.market_data['open'])
            
    def add_height_of_prior_3_periods(self):
        self.market_data['height-1'] = abs(self.market_data['height'].shift(1))
        self.market_data['height-2'] = abs(self.market_data['height'].shift(2))
        self.market_data['height-3'] = abs(self.market_data['height'].shift(3))
        
    def time_frame_directon(self, row):
        val = 0
        if(row['open'] > row['close']):
            val = -1
        elif (row['open'] < row['close']):
            val = 1
        return val
                
    def apply_time_frame_direction(self):
        self.market_data['direction'] = self.market_data.apply(self.time_frame_directon, axis=1)

    # remove nulls due to moving averages
    def remove_nans(self):
        self.market_data.dropna(inplace=True)
    
    def print_pattern_chart(self):
        
        fig = go.Figure(data=[go.Candlestick(x=self.features.index,
                open=self.features['open'],
                high=self.features['high'],
                low=self.features['low'],
                close=self.features['close'])])

        fig.add_trace(go.Line(x=self.market_data.index, y=self.market_data['mid'], 
                              name='average', line=dict(color='black', width=0.1, dash='dot')))

        fig.update_layout(xaxis_rangeslider_visible=False)

        fig.show()
    
    def create_pattern_list(self):
        self.dates = pd.DataFrame(self.market_data.loc[self.market_data['select'] == 1].index)

    def print_pattern_list(self):
        print(f"Len: {len(self.dates)}\n\nlist:\n{self.dates}")
        
    def create_csv_market_(self):
        self.market_data.to_csv(self.path_to_save_md + self.data_name + self.time_frame + self.file_type, index=False)
        

In [515]:
class Marubozu(Process_Market_Data):
    
    # Initialise parent class
    def __init__(self, market_data, data_name='eur-usd', time_frame='daily', 
                path_to_save_md='/Users/stuartdaw/Documents/Capstone_data/data/resampled/',
                path_to_save_ptn= '/Users/stuartdaw/Documents/Capstone_data/data/targets/',
                file_type='.csv', pattern='Marabozu', min_height=0.005, target_factor=1 , test_timeframe=5,
                margin=0.0005):
        super().__init__(market_data, data_name='eur-usd', time_frame='daily', 
                path_to_save_md='/Users/stuartdaw/Documents/Capstone_data/data/resampled/', 
                path_to_save_ptn= '/Users/stuartdaw/Documents/Capstone_data/data/targets/',
                file_type='.csv')
        self.pattern = pattern
        self.min_height = min_height
        self.target_factor = target_factor
        self.test_timeframe = test_timeframe
        self.margin = margin
        self.features = pd.DataFrame()
        self.maru_dict = {'A':0,'B':0,'C':0,'D':0}
        self.select_res_dict = {'A':0,'B':0,'C':0,'D':0,'E':0}

    def generate_pattern_list(self):
        self.process_data()
        self.classify_candles()
        self.add_other_marubozu()
        self.add_previous_days()
        self.add_future_prices()
        self.add_exit_price()
        self.choose_pattern_rows()
        self.choose_best_price()
        self.add_future_date()
        self.remove_nans()
        return self.market_data

        
    
    # Is Marubozu? Look for signicant candlesticks and their direction
    def marubozu_type(self, row):
    
        # Check its a significant height
        if abs(row['open'] - row['close']) < self.min_height:
            self.maru_dict['A'] +=1
            return 0

        # Check the direction
        if row['open'] > row['close']:
            self.maru_dict['B'] +=1
            return -1
        elif row['open'] < row['close']:
            self.maru_dict['C'] +=1
            return 1
        
            self.maru_dict['D'] +=1
        return 0

    # Tag the line with information on the candles
    def classify_candles(self):
        print(self.min_height)
        self.market_data['marubozu'] = self.market_data.apply(lambda row: self.marubozu_type(row), axis=1)
        
    def add_other_marubozu(self):
        self.market_data['marubozu+1'] = self.market_data['marubozu'].shift(-1)
        self.market_data['marubozu-1'] = self.market_data['marubozu'].shift(1)
        self.market_data['marubozu-2'] = self.market_data['marubozu'].shift(2)

    def add_previous_days(self):
        self.market_data['day-1_open'] = self.market_data['open'].shift(1)
        self.market_data['day-2_open'] = self.market_data['open'].shift(2)
        self.market_data['day-3_open'] = self.market_data['open'].shift(3)

        self.market_data['day-1_high'] = self.market_data['high'].shift(1)
        self.market_data['day-2_high'] = self.market_data['high'].shift(2)
        self.market_data['day-3_high'] = self.market_data['high'].shift(3)

        self.market_data['day-1_low'] = self.market_data['low'].shift(1)
        self.market_data['day-2_low'] = self.market_data['low'].shift(2)
        self.market_data['day-3_low'] = self.market_data['low'].shift(3)

        self.market_data['day-1_close'] = self.market_data['close'].shift(1)
        self.market_data['day-2_close'] = self.market_data['close'].shift(2)
        self.market_data['day-3_close'] = self.market_data['close'].shift(3)
        
        
    # create future columns for target and matching connected Marubuzo
    def add_future_prices(self):
        self.market_data['day+1_open'] = self.market_data['open'].shift(-1)
        self.market_data['day+1_high'] = self.market_data['high'].shift(-1)
        self.market_data['day+1_low'] = self.market_data['low'].shift(-1)
        self.market_data['day+1_close'] = self.market_data['close'].shift(-1)
        self.market_data['day+2_high'] = self.market_data['high'].shift(-2)
        self.market_data['day+2_low'] = self.market_data['low'].shift(-2)
        self.market_data['day+3_high'] = self.market_data['high'].shift(-3)
        self.market_data['day+3_low'] = self.market_data['low'].shift(-3)
        self.market_data['day+4_high'] = self.market_data['high'].shift(-4)
        self.market_data['day+4_low'] = self.market_data['low'].shift(-4)
        self.market_data['day+5_high'] = self.market_data['high'].shift(-5)        
        self.market_data['day+5_low'] = self.market_data['low'].shift(-5)        
        
    def choose_exit_price(self, row):
        return row['close'] + (row['height'] * self.target_factor)

    def add_exit_price(self):
        self.market_data['exit_price'] = self.market_data.apply(lambda row: self.choose_exit_price(row), axis=1)
    
    # Select rows that have a opposite pattern of very similar height.
    def tag_rows(self, row):
        
        margin = self.margin

        if abs(row['marubozu']) != 1:
            self.select_res_dict['A'] +=1
            return 0

        if ((row['marubozu-2'] == 1 and row['marubozu-1'] == -1) or (
                row['marubozu-2'] == -1 and row['marubozu-1'] == 1)) and (
            ((abs(row['day-2_open'] - row['day-1_close']) < margin) and
            (abs(row['day-2_close'] - row['day-1_open']) < margin))):
            self.select_res_dict['B'] +=1    
            return 0

        if  ((row['marubozu-1'] == 1 and row['marubozu'] == -1) or (
                row['marubozu-1'] == -1 and row['marubozu'] == 1)) and (
            ((abs(row['open'] - row['day-1_close']) < margin) and
            (abs(row['close'] - row['day-1_open']) < margin))):
            self.select_res_dict['C'] +=1
            return 1

        if ((row['marubozu+1'] == 1 and row['marubozu'] == -1) or (
                row['marubozu+1'] == -1 and row['marubozu'] == 1)) and (
            ((abs(row['open'] - row['day+1_close']) < margin) and
            (abs(row['close'] - row['day+1_open']) < margin))):
            self.select_res_dict['D'] +=1
            return 2 
        
        self.select_res_dict['E'] +=1
        return 0

    def choose_pattern_rows(self):
        self.market_data['select'] = self.market_data.apply(lambda row: self.tag_rows(row), axis=1)
        
    def get_best_price(self, row):
        if row['marubozu'] == 1:     
            return row[['day+1_high','day+2_high','day+3_high','day+4_high','day+5_high']].max()
        elif row['marubozu'] == -1:                 
            return row[['day+1_low','day+2_low','day+3_low','day+4_low','day+5_low']].min()
            
    def choose_best_price(self):
        self.market_data['target'] = self.market_data.apply(lambda row: self.get_best_price(row), axis=1)

        # Calculate the date of 5 business days ahead
        # update using fing loc
    def add_future_date(self):
        self.market_data['date+5'] = self.market_data.index.shift(self.test_timeframe, freq=self.time_code)
        
    def select_pattern(self):
        self.features = self.market_data.loc[(self.market_data['select'] == 1) | (self.market_data['select'] == 2)]
        self.create_pattern_list()
        
    def print_marubozu_chart(self):
        self.select_pattern()
        self.print_pattern_chart()
        
    def create_csv_pattern(self):
        self.dates.to_csv(self.path_to_save_ptn + self.time_frame + '_pattern2' + self.file_type, index=False)

In [504]:
mb = Marubozu(eur_usd)

In [505]:
daily = mb.generate_pattern_list()

0.005


In [506]:
daily.head(8)

Unnamed: 0_level_0,open,high,low,close,mid,bi_mnth_mv_avg,qtr_mv_avg,volatility_3_day,volatility_10_day,pct_chge_3_prds,pct_chge_5_prds,pct_chge_10_prds,height,height-1,height-2,height-3,direction,marubozu,marubozu+1,marubozu-1,marubozu-2,day-1_open,day-2_open,day-3_open,day-1_high,day-2_high,day-3_high,day-1_low,day-2_low,day-3_low,day-1_close,day-2_close,day-3_close,day+1_open,day+1_high,day+1_low,day+1_close,day+2_high,day+2_low,day+3_high,day+3_low,day+4_high,day+4_low,day+5_high,day+5_low,exit_price,select,target,date+5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1
2000-06-20,0.9584,0.9617,0.9509,0.9516,0.955,0.958325,0.958567,0.005717,0.004611,-0.000994,-0.002559,0.005422,0.0068,0.0067,0.0117,0.0041,-1,-1,-1.0,-1.0,1.0,0.965,0.9532,0.958,0.9694,0.9662,0.9607,0.9567,0.9523,0.9504,0.9583,0.9649,0.9539,0.9517,0.9534,0.9433,0.9467,0.9476,0.9349,0.9433,0.9342,0.9399,0.9299,0.9472,0.9374,0.9584,0,0.9299,2000-06-27
2000-06-21,0.9517,0.9534,0.9433,0.9467,0.9492,0.9521,0.955283,0.005331,0.004397,-0.010271,-0.010992,-0.007788,0.005,0.0068,0.0067,0.0117,-1,-1,-1.0,-1.0,-1.0,0.9584,0.965,0.9532,0.9617,0.9694,0.9662,0.9509,0.9567,0.9523,0.9516,0.9583,0.9649,0.9468,0.9476,0.9349,0.9365,0.9433,0.9342,0.9399,0.9299,0.9472,0.9374,0.947,0.9394,0.9517,0,0.9299,2000-06-28
2000-06-22,0.9468,0.9476,0.9349,0.9365,0.94165,0.945425,0.948617,0.000942,0.004894,-0.020798,-0.014959,-0.016451,0.0103,0.005,0.0068,0.0067,-1,-1,0.0,-1.0,-1.0,0.9517,0.9584,0.965,0.9534,0.9617,0.9694,0.9433,0.9509,0.9567,0.9467,0.9516,0.9583,0.9364,0.9433,0.9342,0.9362,0.9399,0.9299,0.9472,0.9374,0.947,0.9394,0.954,0.9418,0.9468,0,0.9299,2000-06-29
2000-06-27,0.9388,0.9472,0.9374,0.9456,0.9422,0.9398,0.938633,0.005466,0.004964,0.000584,-0.013403,-0.015928,0.0068,0.0026,0.0002,0.0103,1,1,0.0,0.0,0.0,0.9361,0.9364,0.9468,0.9399,0.9433,0.9476,0.9299,0.9342,0.9349,0.9387,0.9362,0.9365,0.9454,0.947,0.9394,0.9436,0.954,0.9418,0.9604,0.9518,0.9545,0.9475,0.9531,0.9467,0.9524,0,0.9604,2000-07-04
2000-06-29,0.9435,0.954,0.9418,0.9526,0.94805,0.946275,0.944917,0.00134,0.005155,0.011361,0.006797,-0.008264,0.0091,0.0018,0.0068,0.0026,1,1,0.0,0.0,1.0,0.9454,0.9388,0.9361,0.947,0.9472,0.9399,0.9394,0.9374,0.9299,0.9436,0.9456,0.9387,0.9525,0.9604,0.9518,0.9545,0.9545,0.9475,0.9531,0.9467,0.9573,0.9517,0.9598,0.9497,0.9617,0,0.9604,2000-07-06
2000-07-03,0.9543,0.9545,0.9475,0.9482,0.95125,0.952375,0.950933,0.004226,0.005323,0.007147,0.014775,-0.010815,0.0061,0.002,0.0091,0.0018,-1,-1,0.0,0.0,1.0,0.9525,0.9435,0.9454,0.9604,0.954,0.947,0.9518,0.9418,0.9394,0.9545,0.9526,0.9436,0.9483,0.9531,0.9467,0.9524,0.9573,0.9517,0.9598,0.9497,0.9526,0.9459,0.9557,0.9501,0.9543,0,0.9459,2000-07-10
2000-07-12,0.9503,0.9517,0.9396,0.9417,0.946,0.949225,0.950267,0.004718,0.003575,-0.00442,-0.007137,0.001588,0.0086,0.0041,0.0043,0.0004,-1,-1,-1.0,0.0,0.0,0.9545,0.9502,0.9504,0.9569,0.9557,0.9526,0.9496,0.9501,0.9459,0.9504,0.9545,0.95,0.9416,0.9425,0.933,0.9354,0.9389,0.9318,0.9402,0.9342,0.9368,0.9227,0.927,0.9193,0.9503,0,0.9193,2000-07-19
2000-07-13,0.9416,0.9425,0.933,0.9354,0.9385,0.94225,0.94565,0.004343,0.004135,-0.014543,-0.014025,-0.010073,0.0062,0.0086,0.0041,0.0043,-1,-1,0.0,-1.0,0.0,0.9503,0.9545,0.9502,0.9517,0.9569,0.9557,0.9396,0.9496,0.9501,0.9417,0.9504,0.9545,0.9353,0.9389,0.9318,0.9383,0.9402,0.9342,0.9368,0.9227,0.927,0.9193,0.9342,0.9204,0.9416,0,0.9193,2000-07-20


In [507]:
mb.select_res_dict

{'A': 2738, 'B': 27, 'C': 64, 'D': 63, 'E': 2037}

In [508]:
mb.maru_dict

{'A': 2738, 'B': 1083, 'C': 1108, 'D': 0}

In [509]:
mb.print_marubozu_chart()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [510]:
mb.print_pattern_list()

Len: 64

list:
         date
0  2000-10-11
1  2000-10-20
2  2001-02-09
3  2001-04-05
4  2001-04-09
..        ...
59 2018-09-14
60 2018-10-22
61 2018-12-10
62 2019-01-31
63 2019-02-13

[64 rows x 1 columns]


In [511]:
daily_pattern = pd.read_csv('/Users/stuartdaw/Documents/Capstone_data/data/targets/daily_pattern.csv', 
                           parse_dates=True)

In [512]:
print(daily_pattern)

   pattern_end
0   2001-02-08
1   2001-04-04
2   2001-04-09
3   2001-08-20
4   2002-02-20
..         ...
57  2018-09-14
58  2018-10-22
59  2018-12-10
60  2019-01-31
61  2019-02-13

[62 rows x 1 columns]


In [513]:
mb.create_csv_pattern()

In [421]:
(0.9580+0.9539)/2

0.95595

In [None]:
md = Process_Market_Data(eur_usd)

In [None]:
daily = md.process_data()

In [None]:
daily.head()

In [None]:
# Create Daily Data and remove weekend as doesnt have same liquidity and not 
# going to use to form patterns

daily = eur_usd.resample('B').agg({'open':'first','high':'max',
                                        'low':'min', 'close':'last',
                                        'year':'max','month':'max',
                                        'day':'first'})

daily['day_name'] = daily.index.day_name()
# daily.drop(daily.loc[(daily['day_name'] == 'Sunday') 
#                     | (daily['day_name'] == 'Saturday')].index, inplace=True)
daily.index

In [None]:
daily.head(14)

In [None]:
daily.isnull().sum()

In [None]:
daily.loc[daily['open'].isnull()]

#### Check Dates and Remove Nan's

After checking these dates I noticed:

+ A lot of them were holidays - christmas, new years day
+ They were mostly in the first couple of years

I have decided to remove these for the follwoing reason.
+ The volatility and low liquidity could lead to false patterns
+ Most profiessional traders avoid the markets around holiday periods
+ The number of Nan's is relatively small compared to the dataset, so im prepared to lose some data and maintain accuracy.

[ECB Data](https://sdw.ecb.europa.eu/quickview.do?SERIES_KEY=120.EXR.D.USD.EUR.SP00.A)

[FOREX holidays](https://freshforex.com/analitics/holidays/)

In [None]:
# Errors due to public holidays
daily.dropna(inplace=True)

In [None]:
daily.isnull().sum()

In [None]:
daily.index

In [None]:
daily.shape

In [None]:
# daily.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/daily.csv', index=True)

---

---

# 3.0 Pattern Matching

---

## 3.2 Daily Data

In [None]:
daily.head()

In [None]:
gold.head()

In [None]:
# gold2 = gold.resample('B').max()

In [None]:
daily.head()

In [None]:
daily.isnull().sum()

In [None]:
gold.shape, daily.shape

In [None]:
# daily['gold_usd'] = gold['usd']
daily = pd.concat([daily, gold], axis=1)

In [None]:
gold.index

In [None]:
daily.head(5)

In [None]:
daily.tail()

In [None]:
daily.isnull().sum()

In [None]:
daily.loc[daily['open'].isnull()]

In [None]:
# drop na as they are extra from the gold data
daily.dropna(inplace=True)

In [None]:
daily.isnull().sum()

# Loop through and find the patterns

In [None]:
# Is Marubozu? Look for candlesticks with above a height and determine the timeframes direction

def is_marubozu(row, min_height= 0.005):
        
    # Check its a significant height
    if abs(row['open'] - row['close']) < min_height:
        return 0
    
    # Check the direction
    if row['open'] > row['close']:
        return -1
    elif row['open'] < row['close']:
        return 1
        
    return 0

In [None]:
# Tag the line with information on the 
daily['marubozu'] = daily.apply(lambda row: is_marubozu(row), axis=1)

In [None]:
daily['marubozu+1'] = daily['marubozu'].shift(-1)
daily['marubozu-1'] = daily['marubozu'].shift(1)
daily['marubozu-2'] = daily['marubozu'].shift(2)

In [None]:
# See how many Marubozu have been created
daily['marubozu'].value_counts()

---

In [None]:
daily['av_3_height'] = daily['height'].rolling(3).mean()

---

## Add Moving averages


In [None]:
daily['wk_mv_av'] = daily['close'].rolling(7).mean()
daily['mnth_mv_av'] = daily['close'].rolling(30).mean()
daily['qtr_mv_av'] = daily['close'].rolling(120).mean()

In [None]:
daily.head(34)

In [None]:
daily.index

---

### Add Volatility

In [None]:
daily['vol'] = daily['close'].pct_change().rolling(3).std()

## Create rows that match the pattern + add additional features

In [None]:
#### Add previous 5 time frames
daily_pre = daily.drop(columns=['year','month','day','day_name'])

In [None]:
daily_pre.index

In [None]:
daily_pre.isnull().sum()

In [None]:
daily_pre.head()

In [None]:
daily_pre['day-1_open'] = daily_pre['open'].shift(1)
daily_pre['day-2_open'] = daily_pre['open'].shift(2)
daily_pre['day-3_open'] = daily_pre['open'].shift(3)

daily_pre['day-1_high'] = daily_pre['high'].shift(1)
daily_pre['day-2_high'] = daily_pre['high'].shift(2)
daily_pre['day-3_high'] = daily_pre['high'].shift(3)

daily_pre['day-1_low'] = daily_pre['low'].shift(1)
daily_pre['day-2_low'] = daily_pre['low'].shift(2)
daily_pre['day-3_low'] = daily_pre['low'].shift(3)

daily_pre['day-1_close'] = daily_pre['close'].shift(1)
daily_pre['day-2_close'] = daily_pre['close'].shift(2)
daily_pre['day-3_close'] = daily_pre['close'].shift(3)

daily_pre.head()

In [None]:
# create future columns for target and matching connected Marubuzo
daily_pre['day+1_open'] = daily_pre['open'].shift(-1)
daily_pre['day+1_high'] = daily_pre['high'].shift(-1)
daily_pre['day+1_low'] = daily_pre['low'].shift(-1)
daily_pre['day+1_close'] = daily_pre['close'].shift(-1)

daily_pre['day+2_high'] = daily_pre['high'].shift(-2)
daily_pre['day+3_high'] = daily_pre['high'].shift(-3)
daily_pre['day+4_high'] = daily_pre['high'].shift(-4)
daily_pre['day+5_high'] = daily_pre['high'].shift(-5)

# Calculate the date of 5 business days ahead
daily_pre['date+5'] = daily_pre.index.shift(5, freq='B')

daily_pre.head(10)

In [None]:
daily_pre['target'] = daily_pre[['day+1_high','day+2_high','day+3_high','day+4_high','day+5_high']].apply(max, axis=1)

In [None]:
def choose_exit_price(row):
        return row['close'] + (row['height'] * 1)

In [None]:
daily_pre['double_height'] = daily_pre.apply(choose_exit_price, axis=1)

In [None]:
daily_pre.head()

In [None]:
daily_pre.isnull().sum()

In [None]:
daily_pre.dropna(inplace=True)

In [None]:
daily_pre.isnull().sum()

In [None]:
daily_pre.head(5)

In [None]:
# features = daily_pre.loc[((daily_pre['marubozu'] == 1) &
#                          ((daily_pre['marubozu+1'] == -1) | (daily_pre['marubozu-1'] == -1))) |
#                          ((daily_pre['marubozu'] == -1) &
#                          ((daily_pre['marubozu+1'] == 1) | (daily_pre['marubozu-1'] == 1)))]

In [None]:
# data = [[0,0,0,0,0], [1,-1,1,1,0],[-1,1,-1,1,0], [0,-1,1,1,1],[0,1,-1,1,1],[0,0,1,-1,1],[0,0,-1,1,1]] 
  
# # Create the pandas DataFrame 
# df = pd.DataFrame(data, columns = ['marubozu-2', 'marubozu-1','marubozu','marubozu+1','expected']) 
# df

In [None]:
# # Select rows
# def tag_rows(daily_pre):
    
#     if abs(daily_pre['marubozu']) != 1:
#         return 0
    
#     if (daily_pre['marubozu-2'] == 1 and daily_pre['marubozu-1'] == -1) or (daily_pre['marubozu-2'] == -1 and daily_pre['marubozu-1'] == 1):
#             return 0

#     if (daily_pre['marubozu-1'] == 1 and daily_pre['marubozu'] == -1) or (daily_pre['marubozu-1'] == -1 and daily_pre['marubozu'] == 1):
#             return 1
        
#     if (daily_pre['marubozu+1'] == 1 and daily_pre['marubozu'] == -1) or (daily_pre['marubozu+1'] == -1 and daily_pre['marubozu'] == 1):
#             return 1

In [None]:
# data = [[0,0,0,0,0,0,0,0,0,0,0], [1,-1,1,1,0,0,0,0,0,0,0],[-1,1,-1,1,0,0,0,0,0,0,0], 
#         [0,-1,1,1,10,5,5,10,7,7,1],[0,1,-1,1,5,10,10,5,7,7,1],
#         [0,0,1,-1,10,5,7,7,5,10,1],[0,0,-1,1,5,10,7,7,10,5,1]]
  
# # Create the pandas DataFrame 
# df = pd.DataFrame(data, columns = ['marubozu-2', 'marubozu-1','marubozu','marubozu+1','open','close',
#                                    'day-1_open','day-1_close','day+1_open','day+1_close','expected']) 
# df

In [None]:
# Select rows that have a opposite pattern of very similar height.

def tag_rows(daily_pre, margin=0.0005):
    
    if abs(daily_pre['marubozu']) != 1:
         return 0

    if ((daily_pre['marubozu-2'] == 1 and daily_pre['marubozu-1'] == -1) or (
            daily_pre['marubozu-2'] == -1 and daily_pre['marubozu-1'] == 1)) and (
        ((abs(daily_pre['day-2_open'] - daily_pre['day-1_close']) < margin) and
        (abs(daily_pre['day-2_close'] - daily_pre['day-1_open']) < margin))):
            return 0

    if  ((daily_pre['marubozu-1'] == 1 and daily_pre['marubozu'] == -1) or (
        daily_pre['marubozu-1'] == -1 and daily_pre['marubozu'] == 1)) and (
        ((abs(daily_pre['open'] - daily_pre['day-1_close']) < margin) and
        (abs(daily_pre['close'] - daily_pre['day-1_open']) < margin))):
            return 1
        
    if ((daily_pre['marubozu+1'] == 1 and daily_pre['marubozu'] == -1) or (
        daily_pre['marubozu+1'] == -1 and daily_pre['marubozu'] == 1)) and (
        ((abs(daily_pre['open'] - daily_pre['day+1_close']) < margin) and
        (abs(daily_pre['close'] - daily_pre['day+1_open']) < margin))):
            return 1 

    return 0

In [None]:
daily_pre['select'] = daily_pre.apply(lambda row: tag_rows(row), axis=1)

In [None]:
daily_pre.loc[(daily_pre.index > '2004-04-1') & (daily_pre.index < '2004-04-11'),
              ['marubozu-2', 'marubozu-1','marubozu','marubozu+1','open','close',
                                   'day-1_open','day-1_close','day+1_open','day+1_close','select']]

In [None]:
features = daily_pre.loc[daily_pre['select'] == 1]

In [None]:
features.head(10)

In [None]:
features.shape

In [None]:
fig = go.Figure(data=[go.Candlestick(x=features.index,
                open=features['open'],
                high=features['high'],
                low=features['low'],
                close=features['close'])])

fig.add_trace(go.Line(x=daily.index, y=(daily['close']-daily['open'])+daily['open'], name='average',
                         line=dict(color='black', width=0.1, dash='dot')))

fig.update_layout(xaxis_rangeslider_visible=False)

fig.show()

In [None]:
# Store Dates for start point of predictions

In [None]:
count = 0
dates = []
for row in features.itertuples():
    if count%2 == 0:
        dates.append(row[0])
    count+=1

In [None]:
dates_daily = pd.DataFrame(dates)

In [None]:
dates_daily.rename(columns={0:'pattern_end'}, inplace=True);

In [None]:
dates_daily['pattern_end'] = pd.to_datetime(dates_daily['pattern_end'])

In [None]:
# dates_daily.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/targets/daily_pattern.csv', index=False)

In [None]:
# Create data using cut off dates

In [None]:
# # Save the dataframes 
# for date in dates:
#     df = daily[(daily.index.get_level_values(0) >= '2000-05-30') & (
#     daily.index.get_level_values(0) <= date)]
#     df.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/targets/daily_'+ str(date) + '.csv', index=True)

In [None]:
daily_pre.shape

In [None]:
daily_pre.index

In [None]:
# daily_pre.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/daily.csv', index=True)

---

---

## 3.1 Monthly Data

# Loop through and find the patterns

In [None]:
# Is Marubozu? Look for candlesticks with above a height and determine the timeframes direction


def is_marubozu(row, min_height= 0.005):
        
    # Check its a significant height
    if abs(row['open'] - row['close']) < min_height:
        return 0
    
    # Check the direction
    if row['open'] > row['close']:
        return -1
    elif row['open'] < row['close']:
        return 1
        
    return 0

In [None]:
# Tag the line with information on the 
monthly['marubozu'] = monthly.apply(lambda row: is_marubozu(row), axis=1)

In [None]:
monthly['marubozu+1'] = monthly['marubozu'].shift(-1)
monthly['marubozu-1'] = monthly['marubozu'].shift(1)
monthly['marubozu-2'] = monthly['marubozu'].shift(2)

In [None]:
# See how many Marubozu have been created
monthly['marubozu'].value_counts()

In [None]:
monthly.isnull().sum()

---

## add height

In [None]:
monthly['height'] = monthly['close'] - monthly['open']

In [None]:
monthly.head(20)

In [None]:
monthly['av_3_height'] = monthly['height'].rolling(3).mean()

In [None]:
monthly.head(10)

---

## Add Moving averages


In [None]:
monthly['mnth_mv_av'] = monthly['close'].rolling(2).mean()
monthly['qtr_mv_av'] = monthly['close'].rolling(3).mean()
monthly['yr_mv_av'] = monthly['close'].rolling(12).mean()


In [None]:
monthly.head(13)

## Create rows that match the pattern + add additional features

In [None]:
#### Add previous 5 time frames
monthly = monthly.drop(columns=['year','month'])

In [None]:
monthly.head()

In [None]:
monthly['mth-1_open'] = monthly['open'].shift(1)
monthly['mth-2_open'] = monthly['open'].shift(2)
monthly['mth-3_open'] = monthly['open'].shift(3)

monthly['mth-1_high'] = monthly['high'].shift(1)
monthly['mth-2_high'] = monthly['high'].shift(2)
monthly['mth-3_high'] = monthly['high'].shift(3)

monthly['mth-1_low'] = monthly['low'].shift(1)
monthly['mth-2_low'] = monthly['low'].shift(2)
monthly['mth-3_low'] = monthly['low'].shift(3)

monthly['mth-1_close'] = monthly['close'].shift(1)
monthly['mth-2_close'] = monthly['close'].shift(2)
monthly['mth-3_close'] = monthly['close'].shift(3)

monthly.head()

In [None]:
# create future columns for target and matching connected Marubuzo
monthly['mth+1_open'] = monthly['open'].shift(-1)
monthly['mth+1_high'] = monthly['high'].shift(-1)
monthly['mth+1_low'] = monthly['low'].shift(-1)
monthly['mth+1_close'] = monthly['close'].shift(-1)

monthly['mth+2_high'] = monthly['high'].shift(-2)
monthly['mth+3_high'] = monthly['high'].shift(-3)
monthly.head()

In [None]:
monthly['target'] = monthly[['mth+1_high','mth+2_high','mth+3_high']].apply(max, axis=1)

In [None]:
monthly.head()

In [None]:
monthly.head(5)

In [None]:
# features = daily_pre.loc[((daily_pre['marubozu'] == 1) &
#                          ((daily_pre['marubozu+1'] == -1) | (daily_pre['marubozu-1'] == -1))) |
#                          ((daily_pre['marubozu'] == -1) &
#                          ((daily_pre['marubozu+1'] == 1) | (daily_pre['marubozu-1'] == 1)))]

In [None]:
# data = [[0,0,0,0,0], [1,-1,1,1,0],[-1,1,-1,1,0], [0,-1,1,1,1],[0,1,-1,1,1],[0,0,1,-1,1],[0,0,-1,1,1]] 
  
# # Create the pandas DataFrame 
# df = pd.DataFrame(data, columns = ['marubozu-2', 'marubozu-1','marubozu','marubozu+1','expected']) 
# df

In [None]:
# # Select rows
# def tag_rows(daily_pre):
    
#     if abs(daily_pre['marubozu']) != 1:
#         return 0
    
#     if (daily_pre['marubozu-2'] == 1 and daily_pre['marubozu-1'] == -1) or (daily_pre['marubozu-2'] == -1 and daily_pre['marubozu-1'] == 1):
#             return 0

#     if (daily_pre['marubozu-1'] == 1 and daily_pre['marubozu'] == -1) or (daily_pre['marubozu-1'] == -1 and daily_pre['marubozu'] == 1):
#             return 1
        
#     if (daily_pre['marubozu+1'] == 1 and daily_pre['marubozu'] == -1) or (daily_pre['marubozu+1'] == -1 and daily_pre['marubozu'] == 1):
#             return 1

In [None]:
# data = [[0,0,0,0,0,0,0,0,0,0,0], [1,-1,1,1,0,0,0,0,0,0,0],[-1,1,-1,1,0,0,0,0,0,0,0], 
#         [0,-1,1,1,10,5,5,10,7,7,1],[0,1,-1,1,5,10,10,5,7,7,1],
#         [0,0,1,-1,10,5,7,7,5,10,1],[0,0,-1,1,5,10,7,7,10,5,1]]
  
# # Create the pandas DataFrame 
# df = pd.DataFrame(data, columns = ['marubozu-2', 'marubozu-1','marubozu','marubozu+1','open','close',
#                                    'day-1_open','day-1_close','day+1_open','day+1_close','expected']) 
# df

In [None]:
# Select rows that have a opposite pattern of very similar height.

def tag_rows(df, margin=0.003):
    
    if abs(df['marubozu']) != 1:
         return 0

    if ((df['marubozu-2'] == 1 and df['marubozu-1'] == -1) or (
            df['marubozu-2'] == -1 and df['marubozu-1'] == 1)) and (
        ((abs(df['mth-2_open'] - df['mth-1_close']) < margin) and
        (abs(df['mth-2_close'] - df['mth-1_open']) < margin))):
            return 0

    if  ((df['marubozu-1'] == 1 and df['marubozu'] == -1) or (
        df['marubozu-1'] == -1 and df['marubozu'] == 1)) and (
        ((abs(df['open'] - df['mth-1_close']) < margin) and
        (abs(df['close'] - df['mth-1_open']) < margin))):
            return 1
        
    if ((df['marubozu+1'] == 1 and df['marubozu'] == -1) or (
        df['marubozu+1'] == -1 and df['marubozu'] == 1)) and (
        ((abs(df['open'] - df['mth+1_close']) < margin) and
        (abs(df['close'] - df['mth+1_open']) < margin))):
            return 1 

    return 0

In [None]:
monthly['select'] = monthly.apply(lambda row: tag_rows(row), axis=1)

In [None]:
monthly.loc[(monthly.index > '2004-01-1') & (monthly.index < '2004-12-31'),
              ['marubozu-2', 'marubozu-1','marubozu','marubozu+1','open','close',
                                   'mth-1_open','mth-1_close','mth+1_open','mth+1_close','select']]

In [None]:
features_mth = monthly.loc[monthly['select'] == 1]

In [None]:
features_mth.head(10)

In [None]:
features_mth.shape

In [None]:
fig = go.Figure(data=[go.Candlestick(x=features_mth.index,
                open=features_mth['open'],
                high=features_mth['high'],
                low=features_mth['low'],
                close=features_mth['close'])])

fig.add_trace(go.Line(x=daily.index, y=daily['close'], name='average',
                         line=dict(color='black', width=0.1, dash='dot')))

fig.update_layout(xaxis_rangeslider_visible=False)

fig.show()

In [None]:
# monthly.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/resampled/monthly.csv', index=True)

In [None]:
# Store Dates for start point of predictions

In [None]:
# count = 0
# dates = []
# for row in features_mth.itertuples():
#     if count%2 == 0:
#         dates.append(row[0])
#     count+=1

In [None]:
# Create data using cut off dates

In [None]:
# Save the dataframes 
# for date in dates:
#     df = monthly[(monthly.index.get_level_values(0) >= '2000-05-30') & (
#     daily.index.get_level_values(0) <= date)]
#     df.to_csv('/Users/stuartdaw/Documents/Capstone_data/data/targets/monthly_'+ str(date) + '.csv', index=True)