## Analysing moving average crossings

In [106]:
import pandas as pd
import plotly.graph_objects as go
import datetime as dt
from plotting import CandlePlot

In [107]:
# extend path to import modules from other folders
import sys
# append everything above the level where we're now
sys.path.append("../")

In [108]:
# import instruments to calculate pips after trade signals have been identified
from infrastructure.instrument_collection import instrumentCollection as ic

In [109]:
# loading data for an instruments pair
pair = "EUR_USD"
granularity = "H4"
df = pd.read_pickle(f"../../data/{pair}_{granularity}.pkl")
MA_LIST = [10, 20]

In [110]:
df.shape

(4000, 14)

In [111]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype                  
---  ------  --------------  -----                  
 0   time    4000 non-null   datetime64[ns, tzutc()]
 1   volume  4000 non-null   int64                  
 2   mid_o   4000 non-null   float64                
 3   mid_h   4000 non-null   float64                
 4   mid_l   4000 non-null   float64                
 5   mid_c   4000 non-null   float64                
 6   bid_o   4000 non-null   float64                
 7   bid_h   4000 non-null   float64                
 8   bid_l   4000 non-null   float64                
 9   bid_c   4000 non-null   float64                
 10  ask_o   4000 non-null   float64                
 11  ask_h   4000 non-null   float64                
 12  ask_l   4000 non-null   float64                
 13  ask_c   4000 non-null   float64                
dtypes: datetime64[ns, tzutc()](1), float64(1

In [112]:
df.head()

Unnamed: 0,time,volume,mid_o,mid_h,mid_l,mid_c,bid_o,bid_h,bid_l,bid_c,ask_o,ask_h,ask_l,ask_c
0,2019-12-27 14:00:00+00:00,3513,1.11537,1.11843,1.1152,1.11838,1.11531,1.11837,1.11514,1.11832,1.11543,1.11849,1.11527,1.11844
1,2019-12-27 18:00:00+00:00,1370,1.11836,1.11883,1.1171,1.11758,1.1183,1.11877,1.11683,1.11708,1.11842,1.11889,1.11718,1.11808
2,2019-12-29 22:00:00+00:00,1060,1.11773,1.11954,1.11718,1.11948,1.11723,1.11948,1.11668,1.11941,1.11823,1.11961,1.11751,1.11954
3,2019-12-30 02:00:00+00:00,1031,1.11944,1.12108,1.11918,1.12028,1.11937,1.12101,1.11911,1.12021,1.11952,1.12114,1.11925,1.12034
4,2019-12-30 06:00:00+00:00,2870,1.12028,1.12058,1.11901,1.11994,1.12021,1.12051,1.11895,1.11988,1.12034,1.12064,1.11906,1.12


In [113]:
# get data frame with copy of relevant data
df_ma = df[['time', 'mid_o', 'mid_h', 'mid_l', 'mid_c']].copy()

In [114]:
# calculate the moving averages
# rolling() -> telling pandas to do rolling calculations
for ma in MA_LIST:
    df_ma[f'MA_{ma}'] = df_ma.mid_c.rolling(window=ma).mean()
df_ma.dropna(inplace=True) # inplace has to be set to True otherwise the dataframe won't get modified
# resetting the index
df_ma.reset_index(inplace=True, drop=True)

In [115]:
df_ma.head()

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20
0,2020-01-02 18:00:00+00:00,1.11702,1.11736,1.11636,1.11728,1.120605,1.120208
1,2020-01-02 22:00:00+00:00,1.11728,1.11784,1.117,1.11736,1.120205,1.120157
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011
3,2020-01-03 06:00:00+00:00,1.11664,1.11718,1.11401,1.11402,1.118601,1.119837
4,2020-01-03 10:00:00+00:00,1.11404,1.11488,1.11249,1.11382,1.117863,1.119514


In [116]:
df_ma.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3981 entries, 0 to 3980
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype                  
---  ------  --------------  -----                  
 0   time    3981 non-null   datetime64[ns, tzutc()]
 1   mid_o   3981 non-null   float64                
 2   mid_h   3981 non-null   float64                
 3   mid_l   3981 non-null   float64                
 4   mid_c   3981 non-null   float64                
 5   MA_10   3981 non-null   float64                
 6   MA_20   3981 non-null   float64                
dtypes: datetime64[ns, tzutc()](1), float64(6)
memory usage: 217.8 KB


In [117]:
# plotting the data frame with plotly
# take the first 500 candles of the data frame
df_plot = df_ma.iloc[:500]

In [118]:
df_plot.shape

(500, 7)

In [119]:
# create candle plot instance
cp = CandlePlot(df_plot)

In [120]:
traces = [ f"MA_{x}" for x in MA_LIST ]

In [121]:
traces

['MA_10', 'MA_20']

In [122]:
cp.show_plot(line_traces=traces)

## Deriving a strategy from average crossings

In [123]:
MA_S = "MA_10"
MA_L = "MA_20"
BUY = 1
SELL = -1
NONE = 0

In [124]:
# get data frame with copy of relevant data
df_an = df_ma[['time', 'mid_o', 'mid_h', 'mid_l', 'mid_c', MA_S, MA_L]].copy()

In [125]:
df_an.head()

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20
0,2020-01-02 18:00:00+00:00,1.11702,1.11736,1.11636,1.11728,1.120605,1.120208
1,2020-01-02 22:00:00+00:00,1.11728,1.11784,1.117,1.11736,1.120205,1.120157
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011
3,2020-01-03 06:00:00+00:00,1.11664,1.11718,1.11401,1.11402,1.118601,1.119837
4,2020-01-03 10:00:00+00:00,1.11404,1.11488,1.11249,1.11382,1.117863,1.119514


In [126]:
# calculate delta between short and long line
df_an['DELTA'] = df_an.MA_10 - df_an.MA_20

In [127]:
df_an.head(25) # going from below to above the line at about row 14 -> this is a buy signal

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA
0,2020-01-02 18:00:00+00:00,1.11702,1.11736,1.11636,1.11728,1.120605,1.120208,0.000397
1,2020-01-02 22:00:00+00:00,1.11728,1.11784,1.117,1.11736,1.120205,1.120157,4.8e-05
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621
3,2020-01-03 06:00:00+00:00,1.11664,1.11718,1.11401,1.11402,1.118601,1.119837,-0.001236
4,2020-01-03 10:00:00+00:00,1.11404,1.11488,1.11249,1.11382,1.117863,1.119514,-0.001651
5,2020-01-03 14:00:00+00:00,1.1138,1.11799,1.11354,1.11724,1.117378,1.119379,-0.002001
6,2020-01-03 18:00:00+00:00,1.11722,1.11734,1.11526,1.11592,1.116926,1.1192,-0.002274
7,2020-01-05 22:00:00+00:00,1.11665,1.11684,1.11596,1.11628,1.116522,1.118961,-0.002439
8,2020-01-06 02:00:00+00:00,1.1163,1.1171,1.11606,1.11624,1.11618,1.118776,-0.002596
9,2020-01-06 06:00:00+00:00,1.11624,1.11936,1.11573,1.11934,1.116414,1.11868,-0.002266


In [128]:
# to make a prediction when to buy we need to compare the delta with the DELTA of the day before
# therefore we add a new line where we shift the DELTA value by one day
df_an['DELTA_PREV'] = df_an.DELTA.shift(1)

In [129]:
df_an.head(25) # going from below to above the line at about row 14 -> this is a buy signal

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA,DELTA_PREV
0,2020-01-02 18:00:00+00:00,1.11702,1.11736,1.11636,1.11728,1.120605,1.120208,0.000397,
1,2020-01-02 22:00:00+00:00,1.11728,1.11784,1.117,1.11736,1.120205,1.120157,4.8e-05,0.000397
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621,4.8e-05
3,2020-01-03 06:00:00+00:00,1.11664,1.11718,1.11401,1.11402,1.118601,1.119837,-0.001236,-0.000621
4,2020-01-03 10:00:00+00:00,1.11404,1.11488,1.11249,1.11382,1.117863,1.119514,-0.001651,-0.001236
5,2020-01-03 14:00:00+00:00,1.1138,1.11799,1.11354,1.11724,1.117378,1.119379,-0.002001,-0.001651
6,2020-01-03 18:00:00+00:00,1.11722,1.11734,1.11526,1.11592,1.116926,1.1192,-0.002274,-0.002001
7,2020-01-05 22:00:00+00:00,1.11665,1.11684,1.11596,1.11628,1.116522,1.118961,-0.002439,-0.002274
8,2020-01-06 02:00:00+00:00,1.1163,1.1171,1.11606,1.11624,1.11618,1.118776,-0.002596,-0.002439
9,2020-01-06 06:00:00+00:00,1.11624,1.11936,1.11573,1.11934,1.116414,1.11868,-0.002266,-0.002596


In [130]:
# do we have a trade sign
def is_trade(row):
    # did we have a change and went above the line
    if row.DELTA >= 0 and row.DELTA_PREV < 0:
        return BUY
    elif row.DELTA < 0 and row.DELTA_PREV >= 0:
        return SELL
    return NONE

In [131]:
# applying function and telling pandas to do it row by row (axis=1)
df_an['TRADE'] = df_an.apply(is_trade, axis=1)

In [132]:
df_an.head(25)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA,DELTA_PREV,TRADE
0,2020-01-02 18:00:00+00:00,1.11702,1.11736,1.11636,1.11728,1.120605,1.120208,0.000397,,0
1,2020-01-02 22:00:00+00:00,1.11728,1.11784,1.117,1.11736,1.120205,1.120157,4.8e-05,0.000397,0
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621,4.8e-05,-1
3,2020-01-03 06:00:00+00:00,1.11664,1.11718,1.11401,1.11402,1.118601,1.119837,-0.001236,-0.000621,0
4,2020-01-03 10:00:00+00:00,1.11404,1.11488,1.11249,1.11382,1.117863,1.119514,-0.001651,-0.001236,0
5,2020-01-03 14:00:00+00:00,1.1138,1.11799,1.11354,1.11724,1.117378,1.119379,-0.002001,-0.001651,0
6,2020-01-03 18:00:00+00:00,1.11722,1.11734,1.11526,1.11592,1.116926,1.1192,-0.002274,-0.002001,0
7,2020-01-05 22:00:00+00:00,1.11665,1.11684,1.11596,1.11628,1.116522,1.118961,-0.002439,-0.002274,0
8,2020-01-06 02:00:00+00:00,1.1163,1.1171,1.11606,1.11624,1.11618,1.118776,-0.002596,-0.002439,0
9,2020-01-06 06:00:00+00:00,1.11624,1.11936,1.11573,1.11934,1.116414,1.11868,-0.002266,-0.002596,0


In [133]:
# get a dataframe with the interesting trades
df_trades = df_an[df_an.TRADE != NONE].copy()

In [134]:
# how many trades do we have
df_trades.shape

(209, 10)

In [135]:
df_trades.head(5)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA,DELTA_PREV,TRADE
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621,4.8e-05,-1
14,2020-01-07 02:00:00+00:00,1.11932,1.11944,1.11828,1.11831,1.118167,1.118015,0.000152,-0.000441,1
21,2020-01-08 06:00:00+00:00,1.11492,1.11549,1.11253,1.11386,1.116522,1.1167,-0.000178,0.000213,-1
41,2020-01-13 14:00:00+00:00,1.11189,1.11472,1.11176,1.11398,1.111703,1.111388,0.000315,-3.5e-05,1
65,2020-01-17 14:00:00+00:00,1.11006,1.11062,1.10864,1.10912,1.113187,1.11345,-0.000263,0.000161,-1


In [136]:
# let's check if the crosses appear where they're supposed to appear
cp = CandlePlot(df_an.iloc[15:70])
cp.show_plot(line_traces=[MA_S, MA_L])

In [137]:
# evaluate the trades in terms of Pips
# to gain insight how efective our trades were
# load instruments that have been imported at the top
ic.LoadInstruments("../../data")

In [138]:
ic

<infrastructure.instrument_collection.InstrumentCollection at 0x110491a90>

In [139]:
ic.instruments_dict[pair] # loading the "EUR_USD" pair from the collection

{'name': 'EUR_USD', 'ins_type': 'CURRENCY', 'displayName': 'EUR/USD', 'pipLocation': 0.0001, 'tradeUnitsPrecision': 0, 'marginRate': 0.0333}

In [140]:
ins_data = ic.instruments_dict[pair]

In [141]:
# accessing the gains that we've made
# looking at the data first
# example line 21 and 41:
# 21: we put a sell on 1.11386 (mid close)
# 41 then again we put a buy on 1.11398 (mid close)
df_trades.head()

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA,DELTA_PREV,TRADE
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621,4.8e-05,-1
14,2020-01-07 02:00:00+00:00,1.11932,1.11944,1.11828,1.11831,1.118167,1.118015,0.000152,-0.000441,1
21,2020-01-08 06:00:00+00:00,1.11492,1.11549,1.11253,1.11386,1.116522,1.1167,-0.000178,0.000213,-1
41,2020-01-13 14:00:00+00:00,1.11189,1.11472,1.11176,1.11398,1.111703,1.111388,0.000315,-3.5e-05,1
65,2020-01-17 14:00:00+00:00,1.11006,1.11062,1.10864,1.10912,1.113187,1.11345,-0.000263,0.000161,-1


In [142]:
# highlight background color for mid_c row
# function taken from 
# https://stackoverflow.com/questions/44388149/colouring-one-column-of-pandas-dataframe
def highlight_col(x):
    r = 'background-color: green; color: white'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    
    df1.iloc[:, 4] = r
    return df1    

In [143]:
# calculate with diff from pandas
# shift diffs up in minus 1 direction to make correct calculations
df_trades['DIFF'] = df_trades.mid_c.diff().shift(-1)
# filling Na's
df_trades.fillna(0, inplace=True)

In [144]:
# line 21 to 41: price went up so difference is positive
# DIFF line 21 is mid_c line 41 minus mid_c line 21
df_trades.head().style.apply(highlight_col, axis=None)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA,DELTA_PREV,TRADE,DIFF
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621,4.8e-05,-1,0.00167
14,2020-01-07 02:00:00+00:00,1.11932,1.11944,1.11828,1.11831,1.118167,1.118015,0.000152,-0.000441,1,-0.00445
21,2020-01-08 06:00:00+00:00,1.11492,1.11549,1.11253,1.11386,1.116522,1.1167,-0.000178,0.000213,-1,0.00012
41,2020-01-13 14:00:00+00:00,1.11189,1.11472,1.11176,1.11398,1.111703,1.111388,0.000315,-3.5e-05,1,-0.00486
65,2020-01-17 14:00:00+00:00,1.11006,1.11062,1.10864,1.10912,1.113187,1.11345,-0.000263,0.000161,-1,-0.00619


In [147]:
# convert diff into pips
df_trades['GAIN'] = df_trades['DIFF'] / ins_data.pipLocation
# needs to be multiplied to get correct gain or loss
df_trades['GAIN'] = df_trades['GAIN'] * df_trades['TRADE']

In [148]:
df_trades.head().style.apply(highlight_col, axis=None)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,DELTA,DELTA_PREV,TRADE,DIFF,GAIN
2,2020-01-03 02:00:00+00:00,1.11738,1.11774,1.11644,1.11664,1.119489,1.12011,-0.000621,4.8e-05,-1,0.00167,-16.7
14,2020-01-07 02:00:00+00:00,1.11932,1.11944,1.11828,1.11831,1.118167,1.118015,0.000152,-0.000441,1,-0.00445,-44.5
21,2020-01-08 06:00:00+00:00,1.11492,1.11549,1.11253,1.11386,1.116522,1.1167,-0.000178,0.000213,-1,0.00012,-1.2
41,2020-01-13 14:00:00+00:00,1.11189,1.11472,1.11176,1.11398,1.111703,1.111388,0.000315,-3.5e-05,1,-0.00486,-48.6
65,2020-01-17 14:00:00+00:00,1.11006,1.11062,1.10864,1.10912,1.113187,1.11345,-0.000263,0.000161,-1,-0.00619,61.9


In [150]:
# calculate sum of the gains to see how it performed over time
# what's getting summarized here are pips
df_trades.GAIN.sum()

2131.0000000000136