## Analysing moving average crossings

In [141]:
import pandas as pd
import plotly.graph_objects as go
import datetime as dt
from plotting import CandlePlot

In [142]:
# extend path to import modules from other folders
import sys
# append everything above the level where we're now
sys.path.append("../")

In [143]:
# import instruments to calculate pips after trade signals have been identified
from infrastructure.instrument_collection import instrumentCollection as ic

In [144]:
# loading data for an instruments pair
pair = "GBP_JPY"
granularity = "H4"
df = pd.read_pickle(f"../../data/{pair}_{granularity}.pkl")
MA_LIST = [10, 20, 50, 100, 200]

In [145]:
df.shape

(4000, 14)

In [146]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype                  
---  ------  --------------  -----                  
 0   time    4000 non-null   datetime64[ns, tzutc()]
 1   volume  4000 non-null   int64                  
 2   mid_o   4000 non-null   float64                
 3   mid_h   4000 non-null   float64                
 4   mid_l   4000 non-null   float64                
 5   mid_c   4000 non-null   float64                
 6   bid_o   4000 non-null   float64                
 7   bid_h   4000 non-null   float64                
 8   bid_l   4000 non-null   float64                
 9   bid_c   4000 non-null   float64                
 10  ask_o   4000 non-null   float64                
 11  ask_h   4000 non-null   float64                
 12  ask_l   4000 non-null   float64                
 13  ask_c   4000 non-null   float64                
dtypes: datetime64[ns, tzutc()](1), float64(1

In [147]:
df.head()

Unnamed: 0,time,volume,mid_o,mid_h,mid_l,mid_c,bid_o,bid_h,bid_l,bid_c,ask_o,ask_h,ask_l,ask_c
0,2019-12-27 14:00:00+00:00,7147,143.394,143.653,143.299,143.39,143.372,143.629,143.276,143.368,143.417,143.677,143.322,143.411
1,2019-12-27 18:00:00+00:00,3835,143.392,143.45,143.108,143.295,143.369,143.427,143.085,143.22,143.414,143.473,143.132,143.37
2,2019-12-29 22:00:00+00:00,3818,143.32,143.39,143.116,143.318,143.245,143.37,143.041,143.297,143.395,143.433,143.191,143.338
3,2019-12-30 02:00:00+00:00,3415,143.316,143.358,143.02,143.228,143.294,143.34,142.994,143.206,143.337,143.375,143.041,143.25
4,2019-12-30 06:00:00+00:00,6734,143.226,143.267,142.868,143.175,143.204,143.249,142.853,143.16,143.248,143.285,142.884,143.19


In [148]:
# get data frame with copy of relevant data
df_ma = df[['time', 'mid_o', 'mid_h', 'mid_l', 'mid_c']].copy()

In [149]:
# calculate the moving averages
# rolling() -> telling pandas to do rolling calculations
for ma in MA_LIST:
    df_ma[f'MA_{ma}'] = df_ma.mid_c.rolling(window=ma).mean()
df_ma.dropna(inplace=True) # inplace has to be set to True otherwise the dataframe won't get modified
# resetting the index
df_ma.reset_index(inplace=True, drop=True)

In [150]:
df_ma.head()

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_10,MA_20,MA_50,MA_100,MA_200
0,2020-02-13 18:00:00+00:00,143.32,143.404,143.202,143.23,142.7285,142.3946,142.25422,142.51146,142.74462
1,2020-02-13 22:00:00+00:00,143.235,143.346,143.132,143.274,142.7777,142.478,142.29322,142.50792,142.74404
2,2020-02-14 02:00:00+00:00,143.272,143.404,143.176,143.236,142.8408,142.5515,142.33268,142.4965,142.743745
3,2020-02-14 06:00:00+00:00,143.234,143.409,142.974,143.128,142.8862,142.6141,142.37104,142.48532,142.742795
4,2020-02-14 10:00:00+00:00,143.13,143.204,142.78,142.878,142.9066,142.6619,142.39644,142.47061,142.741045


In [151]:
df_ma.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3801 entries, 0 to 3800
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype                  
---  ------  --------------  -----                  
 0   time    3801 non-null   datetime64[ns, tzutc()]
 1   mid_o   3801 non-null   float64                
 2   mid_h   3801 non-null   float64                
 3   mid_l   3801 non-null   float64                
 4   mid_c   3801 non-null   float64                
 5   MA_10   3801 non-null   float64                
 6   MA_20   3801 non-null   float64                
 7   MA_50   3801 non-null   float64                
 8   MA_100  3801 non-null   float64                
 9   MA_200  3801 non-null   float64                
dtypes: datetime64[ns, tzutc()](1), float64(9)
memory usage: 297.1 KB


In [152]:
# plotting the data frame with plotly
# take the first 500 candles of the data frame
df_plot = df_ma.iloc[:500]

In [153]:
df_plot.shape

(500, 10)

In [154]:
# create candle plot instance
cp = CandlePlot(df_plot)

In [155]:
traces = [ f"MA_{x}" for x in MA_LIST ]

In [156]:
traces

['MA_10', 'MA_20', 'MA_50', 'MA_100', 'MA_200']

In [157]:
cp.show_plot(line_traces=traces)

## Deriving a strategy from average crossings

In [158]:
MA_S = "MA_50"
MA_L = "MA_200"
BUY = 1
SELL = -1
NONE = 0

In [159]:
# get data frame with copy of relevant data
df_an = df_ma[['time', 'mid_o', 'mid_h', 'mid_l', 'mid_c', MA_S, MA_L]].copy()

In [160]:
df_an.head()

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200
0,2020-02-13 18:00:00+00:00,143.32,143.404,143.202,143.23,142.25422,142.74462
1,2020-02-13 22:00:00+00:00,143.235,143.346,143.132,143.274,142.29322,142.74404
2,2020-02-14 02:00:00+00:00,143.272,143.404,143.176,143.236,142.33268,142.743745
3,2020-02-14 06:00:00+00:00,143.234,143.409,142.974,143.128,142.37104,142.742795
4,2020-02-14 10:00:00+00:00,143.13,143.204,142.78,142.878,142.39644,142.741045


In [161]:
# calculate delta between short and long line
df_an['DELTA'] = df_an.MA_50 - df_an.MA_200

In [162]:
df_an.head(25) # going from below to above the line at about row 14 -> this is a buy signal

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA
0,2020-02-13 18:00:00+00:00,143.32,143.404,143.202,143.23,142.25422,142.74462,-0.4904
1,2020-02-13 22:00:00+00:00,143.235,143.346,143.132,143.274,142.29322,142.74404,-0.45082
2,2020-02-14 02:00:00+00:00,143.272,143.404,143.176,143.236,142.33268,142.743745,-0.411065
3,2020-02-14 06:00:00+00:00,143.234,143.409,142.974,143.128,142.37104,142.742795,-0.371755
4,2020-02-14 10:00:00+00:00,143.13,143.204,142.78,142.878,142.39644,142.741045,-0.344605
5,2020-02-14 14:00:00+00:00,142.876,143.128,142.768,143.096,142.41852,142.74065,-0.32213
6,2020-02-14 18:00:00+00:00,143.098,143.266,143.01,143.221,142.43772,142.740205,-0.302485
7,2020-02-16 22:00:00+00:00,143.271,143.374,143.12,143.284,142.4504,142.742295,-0.291895
8,2020-02-17 02:00:00+00:00,143.282,143.354,143.243,143.328,142.46234,142.745075,-0.282735
9,2020-02-17 06:00:00+00:00,143.326,143.348,143.141,143.274,142.47606,142.748175,-0.272115


In [163]:
# to make a prediction when to buy we need to compare the delta with the DELTA of the day before
# therefore we add a new line where we shift the DELTA value by one day
df_an['DELTA_PREV'] = df_an.DELTA.shift(1)

In [164]:
df_an.head(25) # going from below to above the line at about row 14 -> this is a buy signal

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA,DELTA_PREV
0,2020-02-13 18:00:00+00:00,143.32,143.404,143.202,143.23,142.25422,142.74462,-0.4904,
1,2020-02-13 22:00:00+00:00,143.235,143.346,143.132,143.274,142.29322,142.74404,-0.45082,-0.4904
2,2020-02-14 02:00:00+00:00,143.272,143.404,143.176,143.236,142.33268,142.743745,-0.411065,-0.45082
3,2020-02-14 06:00:00+00:00,143.234,143.409,142.974,143.128,142.37104,142.742795,-0.371755,-0.411065
4,2020-02-14 10:00:00+00:00,143.13,143.204,142.78,142.878,142.39644,142.741045,-0.344605,-0.371755
5,2020-02-14 14:00:00+00:00,142.876,143.128,142.768,143.096,142.41852,142.74065,-0.32213,-0.344605
6,2020-02-14 18:00:00+00:00,143.098,143.266,143.01,143.221,142.43772,142.740205,-0.302485,-0.32213
7,2020-02-16 22:00:00+00:00,143.271,143.374,143.12,143.284,142.4504,142.742295,-0.291895,-0.302485
8,2020-02-17 02:00:00+00:00,143.282,143.354,143.243,143.328,142.46234,142.745075,-0.282735,-0.291895
9,2020-02-17 06:00:00+00:00,143.326,143.348,143.141,143.274,142.47606,142.748175,-0.272115,-0.282735


In [165]:
# do we have a trade sign
def is_trade(row):
    # did we have a change and went above the line
    if row.DELTA >= 0 and row.DELTA_PREV < 0:
        return BUY
    elif row.DELTA < 0 and row.DELTA_PREV >= 0:
        return SELL
    return NONE

In [166]:
# applying function and telling pandas to do it row by row (axis=1)
df_an['TRADE'] = df_an.apply(is_trade, axis=1)

In [167]:
df_an.head(25)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA,DELTA_PREV,TRADE
0,2020-02-13 18:00:00+00:00,143.32,143.404,143.202,143.23,142.25422,142.74462,-0.4904,,0
1,2020-02-13 22:00:00+00:00,143.235,143.346,143.132,143.274,142.29322,142.74404,-0.45082,-0.4904,0
2,2020-02-14 02:00:00+00:00,143.272,143.404,143.176,143.236,142.33268,142.743745,-0.411065,-0.45082,0
3,2020-02-14 06:00:00+00:00,143.234,143.409,142.974,143.128,142.37104,142.742795,-0.371755,-0.411065,0
4,2020-02-14 10:00:00+00:00,143.13,143.204,142.78,142.878,142.39644,142.741045,-0.344605,-0.371755,0
5,2020-02-14 14:00:00+00:00,142.876,143.128,142.768,143.096,142.41852,142.74065,-0.32213,-0.344605,0
6,2020-02-14 18:00:00+00:00,143.098,143.266,143.01,143.221,142.43772,142.740205,-0.302485,-0.32213,0
7,2020-02-16 22:00:00+00:00,143.271,143.374,143.12,143.284,142.4504,142.742295,-0.291895,-0.302485,0
8,2020-02-17 02:00:00+00:00,143.282,143.354,143.243,143.328,142.46234,142.745075,-0.282735,-0.291895,0
9,2020-02-17 06:00:00+00:00,143.326,143.348,143.141,143.274,142.47606,142.748175,-0.272115,-0.282735,0


In [168]:
# get a dataframe with the interesting trades
df_trades = df_an[df_an.TRADE != NONE].copy()

In [169]:
# how many trades do we have
df_trades.shape

(28, 10)

In [170]:
df_trades.head(5)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA,DELTA_PREV,TRADE
27,2020-02-20 06:00:00+00:00,143.83,144.609,143.789,144.476,142.80138,142.80117,0.00021,-0.0418,1
69,2020-03-02 06:00:00+00:00,138.866,139.188,137.542,137.656,142.6701,142.74126,-0.07116,0.00753,-1
248,2020-04-13 01:00:00+00:00,134.844,134.882,134.517,134.58,134.04794,134.03816,0.00978,-0.05464,1
349,2020-05-05 21:00:00+00:00,132.564,132.701,132.136,132.161,133.1119,133.146035,-0.034135,0.001105,-1
467,2020-06-02 13:00:00+00:00,135.912,136.524,135.912,136.286,132.4762,132.450305,0.025895,-0.05588,1


In [187]:
# let's check if the crosses appear where they're supposed to appear
cp = CandlePlot(df_an.iloc[50:260])
cp.show_plot(line_traces=[MA_S, MA_L])

In [172]:
# evaluate the trades in terms of Pips
# to gain insight how efective our trades were
# load instruments that have been imported at the top
ic.LoadInstruments("../../data")

In [173]:
ic

<infrastructure.instrument_collection.InstrumentCollection at 0x11e0d2af0>

In [174]:
ic.instruments_dict[pair] # loading the "EUR_USD" pair from the collection

{'name': 'GBP_JPY', 'ins_type': 'CURRENCY', 'displayName': 'GBP/JPY', 'pipLocation': 0.01, 'tradeUnitsPrecision': 0, 'marginRate': 0.0333}

In [175]:
ins_data = ic.instruments_dict[pair]

In [176]:
# accessing the gains that we've made
# looking at the data first
# example line 21 and 41:
# 21: we put a sell on 1.11386 (mid close)
# 41 then again we put a buy on 1.11398 (mid close)
df_trades.head()

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA,DELTA_PREV,TRADE
27,2020-02-20 06:00:00+00:00,143.83,144.609,143.789,144.476,142.80138,142.80117,0.00021,-0.0418,1
69,2020-03-02 06:00:00+00:00,138.866,139.188,137.542,137.656,142.6701,142.74126,-0.07116,0.00753,-1
248,2020-04-13 01:00:00+00:00,134.844,134.882,134.517,134.58,134.04794,134.03816,0.00978,-0.05464,1
349,2020-05-05 21:00:00+00:00,132.564,132.701,132.136,132.161,133.1119,133.146035,-0.034135,0.001105,-1
467,2020-06-02 13:00:00+00:00,135.912,136.524,135.912,136.286,132.4762,132.450305,0.025895,-0.05588,1


In [177]:
# highlight background color for mid_c row
# function taken from 
# https://stackoverflow.com/questions/44388149/colouring-one-column-of-pandas-dataframe
def highlight_col(x):
    r = 'background-color: green; color: white'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    
    df1.iloc[:, 4] = r
    return df1    

In [178]:
# calculate with diff from pandas
# shift diffs up in minus 1 direction to make correct calculations
df_trades['DIFF'] = df_trades.mid_c.diff().shift(-1)
# filling Na's
df_trades.fillna(0, inplace=True)

In [179]:
# line 21 to 41: price went up so difference is positive
# DIFF line 21 is mid_c line 41 minus mid_c line 21
df_trades.head().style.apply(highlight_col, axis=None)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA,DELTA_PREV,TRADE,DIFF
27,2020-02-20 06:00:00+00:00,143.83,144.609,143.789,144.476,142.80138,142.80117,0.00021,-0.0418,1,-6.82
69,2020-03-02 06:00:00+00:00,138.866,139.188,137.542,137.656,142.6701,142.74126,-0.07116,0.00753,-1,-3.076
248,2020-04-13 01:00:00+00:00,134.844,134.882,134.517,134.58,134.04794,134.03816,0.00978,-0.05464,1,-2.419
349,2020-05-05 21:00:00+00:00,132.564,132.701,132.136,132.161,133.1119,133.146035,-0.034135,0.001105,-1,4.125
467,2020-06-02 13:00:00+00:00,135.912,136.524,135.912,136.286,132.4762,132.450305,0.025895,-0.05588,1,-3.184


In [180]:
# convert diff into pips
df_trades['GAIN'] = df_trades['DIFF'] / ins_data.pipLocation
# needs to be multiplied to get correct gain or loss
df_trades['GAIN'] = df_trades['GAIN'] * df_trades['TRADE']

In [181]:
df_trades.head().style.apply(highlight_col, axis=None)

Unnamed: 0,time,mid_o,mid_h,mid_l,mid_c,MA_50,MA_200,DELTA,DELTA_PREV,TRADE,DIFF,GAIN
27,2020-02-20 06:00:00+00:00,143.83,144.609,143.789,144.476,142.80138,142.80117,0.00021,-0.0418,1,-6.82,-682.0
69,2020-03-02 06:00:00+00:00,138.866,139.188,137.542,137.656,142.6701,142.74126,-0.07116,0.00753,-1,-3.076,307.6
248,2020-04-13 01:00:00+00:00,134.844,134.882,134.517,134.58,134.04794,134.03816,0.00978,-0.05464,1,-2.419,-241.9
349,2020-05-05 21:00:00+00:00,132.564,132.701,132.136,132.161,133.1119,133.146035,-0.034135,0.001105,-1,4.125,-412.5
467,2020-06-02 13:00:00+00:00,135.912,136.524,135.912,136.286,132.4762,132.450305,0.025895,-0.05588,1,-3.184,-318.4


In [182]:
# calculate sum of the gains to see how it performed over time
# what's getting summarized here are pips
df_trades.GAIN.sum()

-3438.3000000000147

In [183]:
# calculating cumulative gain
df_trades['GAIN_C'] = df_trades['GAIN'].cumsum()

In [184]:
cp = CandlePlot(df_trades, candles=False)
cp.show_plot(line_traces=['GAIN_C'])
# plot shows a huge gain at the end of January 2020
# the time when the covid crises started and everything dropped except the US Dollar
# high pip value due to rise of US Dollar at this time
# doesn't tell much about how effective strategy is