<h1>Requirement pip install</h1>

In [None]:
!pip install googleapis-common-protos protobuf grpcio pandas matplotlib systemathics.apis
!pip install sklearn

In [None]:
import os
import grpc
import pandas as pd
from datetime import datetime
import google.type.date_pb2 as date
import google.type.dayofweek_pb2 as dayofweek
import google.type.timeofday_pb2 as timeofday
import google.protobuf.duration_pb2 as duration
import systemathics.apis.type.shared.v1.identifier_pb2 as identifier
import systemathics.apis.services.daily.v1.daily_bars_pb2 as daily_bars
import systemathics.apis.services.daily.v1.daily_bars_pb2_grpc as daily_bars_service
from math import *
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

<h1>Authentification</h1>

In [None]:
token = f"Bearer {os.environ['AUTH0_TOKEN']}"
display(token)

<h1>Get Data of pairs selected for the dataset</h1>

<p> The indicators we calculate which are going to be used by the model to predict future spread are the following ones : the normalized spread, the correlation between the two assets on the last 30 days, the volatility on the last 30 days, the volume in the last period, the spread evolution with the last period, the spread moving average in the last 7 days, the week and the month (seasonality could have an impact).<space><space> 

This might be enough though, we could add at least correlation and volatility on different windows or others indicators (why not an indicator measuring the current fear on the market?)
</p>

In [None]:
# set the instruments identifiers: tickers and exchange
exchange = "XNGS"

# Below we have all the pairs used to constitute the dataset
ticker_1 ="ADSK"
ticker_2 ="CHKP"
"""
ticker_1 ="EA"
ticker_2 ="SPLK"

ticker_1 ="AVGO"
ticker_2 ="ADI"

ticker_1 ="CPRT"
ticker_2 ="PAYX"

ticker_1 ="TXN"
ticker_2 ="NXPI"

ticker_1 ="BIIB"
ticker_2 ="GILD"

ticker_1 ="MU"
ticker_2 ="INTC"

ticker_1 ="ADP"
ticker_2 ="CTAS"

"""

# create daily bars requests for the pair instruments
daily_request_1 = daily_bars.DailyBarsRequest(identifier = identifier.Identifier(exchange = exchange, ticker = ticker_1))
daily_request_2 = daily_bars.DailyBarsRequest(identifier = identifier.Identifier(exchange = exchange, ticker = ticker_2))

# open a gRPC channel, instantiate the daily bars service and get the reply for the 1st instrument
with open(os.environ['SSL_CERT_FILE'], 'rb') as f:
    credentials = grpc.ssl_channel_credentials(f.read())
with grpc.secure_channel(os.environ['GRPC_APIS'], credentials) as channel:
    daily_service = daily_bars_service.DailyBarsServiceStub(channel)
    response_1 = daily_service.DailyBars(request = daily_request_1, metadata = [('authorization', token)])
    
print("Total bars for ticker1 retrieved: ",len(response_1.data))

# open a gRPC channel, instantiate the daily bars service and get the reply for the 2nd instrument
with open(os.environ['SSL_CERT_FILE'], 'rb') as f:
    credentials = grpc.ssl_channel_credentials(f.read())
with grpc.secure_channel(os.environ['GRPC_APIS'], credentials) as channel:
    daily_service = daily_bars_service.DailyBarsServiceStub(channel)
    response_2 = daily_service.DailyBars(request = daily_request_2, metadata = [('authorization', token)])
    
print("Total bars for ticker2 retrieved: ",len(response_2.data))

# create pandas dataframe to store close prices for the pair instruments
length = 1000 # keep last 500 points
dates = [datetime(ts.date.year,ts.date.month, ts.date.day ) for ts in response_2.data[-length:]]
prices1 = [ts.close for ts in response_1.data[-length:]]
prices2 = [ts.close for ts in response_2.data[-length:]]
data = {'Date': dates, 'Price_1': prices1, 'Price_2': prices2}
df = pd.DataFrame(data=data)
df['Price_1'] = pd.to_numeric(df['Price_1'])
df['Price_2'] = pd.to_numeric(df['Price_2'])


# Here we calculate all the indicators used as variables by the Machine Learning models to predict later 

df['Rol1'] = df['Price_1'].shift(periods=5)   # Here we get the price 5 periods before to get the evolution on one week
df['Rol2'] = df['Price_2'].shift(periods=5)

df['Spread'] = np.log10(df['Price_1'] / df['Rol1']) - np.log10(df['Price_2'] / df['Rol2'])    #  cf Formula used to calculate the spread
df['z_score']=(df['Spread']-df['Spread'].rolling(window=30).mean())/df['Spread'].rolling(window=30).std()  # Here we calculate the spread normalized 

df['z_score_evol']=df['z_score']-df['z_score'].shift(periods=1)   # We get the evolution of the spread on one period

df['z_score_avg_week']=df['z_score'].rolling(window=5).mean() # We calculate the average spread on the week

period=30 # Period used for the historical volatility

number_days=252   #Number of trading days in a year (to get annualized volatility)

df['log_returns1'] = np.log(df['Price_1']/df['Price_1'].shift(periods=1))
df['log_returns2'] = np.log(df['Price_2']/df['Price_2'].shift(periods=1))

df['volatility1']= df['log_returns1'].rolling(window=period).std()*np.sqrt(number_days)  # Volatility of the first asset
df['volatility2']= df['log_returns2'].rolling(window=period).std()*np.sqrt(number_days)  # Volatility of the second asset

df['avg_volatility']=(df['volatility1']+df['volatility2'])/2  # We calculate the average volatility of the two assets

df['correlation']=0

for i in range (len(df)):
    df['correlation'].iloc[i]=df['Price_1'].iloc[i-30:i].corr(df['Price_2'].iloc[i-30:i])  # Correlation between the two assets in the last 30 periods
df['corr_evol']=df['correlation']-df['correlation'].shift(periods=1)

#df[['year','month','day']]= df.Date.str.split("-",expand=True)
df['month'] = df['Date'].dt.month  # Current month 
df['week'] = df['Date'].dt.isocalendar().week  # Current week of the year

df=df.drop(columns=['volatility1', 'volatility2','log_returns1','log_returns2','Rol1','Rol2'])  # We drop columns only used to calculate the indicators we wanted

df['future_z_score']= df['z_score'].shift(-5)  # We get the future normalized spread in 5 periods, which is going to be our target for the ML models

def target (spread):
    if spread>1.5:
        return 1
    elif spread<-1.5 :
        return -1
    else :
        return 0

df['target']= df['future_z_score'].apply(target)
df=df.drop(columns=['future_z_score'])   # We drop the future normalized spread since we are making it a classification problem with the target function
df=df[::5]   # We keep only one row out of 5 periods since we dont want the model to be able to use data between the day it makes the prediction and the day the prediction 
# is verified, otherwise it would be cheating
df
df.to_csv(r'./data1.csv')


<h1>Creating the dataset </h1>

<p> We built a dataset from 8 different pairs that are stationary over the recovered period. We then grouped all the data into one dataframe</p>

In [None]:
df1=pd.read_csv('./data1.csv')
df2=pd.read_csv('./data2.csv')
df3=pd.read_csv('./data3.csv')
df4=pd.read_csv('./data4.csv')
df5=pd.read_csv('./data5.csv')
df6=pd.read_csv('./data6.csv')
df7=pd.read_csv('./data7.csv')
df8=pd.read_csv('./data8.csv')

frames=[df1,df2,df3,df4,df5,df6,df7,df8]
dataset=pd.concat(frames)
dataset.to_csv('./dataset.csv')  # Saving the dataset 

In [None]:
dataset['target'].value_counts()  # We see that 90% of the time the normalised spread is between -1.5 and 1.5 

<h1>Loading the dataset</h1>

In [None]:
dataset=pd.read_csv('./dataset.csv') 
dataset=dataset.drop(columns="Unnamed: 0")
dataset=dataset.sample(frac=1)
dataset

<h1>Training/Test Sets</h1>

In [None]:
df_test=dataset[int(0.8*len(df)):]  # Using 80% as a training set and 20% for the test
df_train=dataset[:int(0.8*len(df))]

df_test = df_test.dropna()
df_train = df_train.dropna()


df_test = df_test.dropna().sample(frac=1).reset_index(drop=True)
df_train = df_train.dropna().sample(frac=1).reset_index(drop=True)

# Below we create the variables part X and the target part y

y_train=df_train[['target']]
y_test=df_test[['target']]

X_train=df_train.drop(columns=['target'])
X_test=df_test.drop(columns=['target'])

X_train

<h1>Creating/evaluating the ML models </h1>

In [None]:
rf = RandomForestClassifier()  #Using a random forest classifier
rf = rf.fit(X_train, y_train.values.ravel())

y_pred = rf.predict(X_test)
print(y_pred)

print("accuracy_score: %.2f"% accuracy_score(y_test, y_pred))

In [None]:
gb = GradientBoostingClassifier() #Using a gradient boosting classifier
gb = gb.fit(X_train, y_train.values.ravel())

y_pred = gb.predict(X_test)
print(y_pred,len(y_pred))

print("accuracy_score: %.2f"% accuracy_score(y_test, y_pred))

In [None]:
reg = LogisticRegression(max_iter=1000)   #Using a logistic regression
reg = reg.fit(X_train, y_train.values.ravel())

y_pred = reg.predict(X_test)
print(y_pred)
    
print("accuracy_score: %.2f"% accuracy_score(y_test, y_pred))

<h1>Using the Machine Learning model in a backtest </h1>

<p> In this part we try the ML strategy on only one pair. Strategy used for the backtest: an entry in position takes place when the model predicts a normalized spread that will be higher than 1.5 in a week or lower than -1.5. If the model predicts a normalized spread higher than 1.5, a long position is taken (long asset 1 and short asset 2) because this means that asset 1 should perform better than asset 2 according to the algorithm (in the opposite case a short position is taken). We keep the position as long as the model predicts future spread higher than 1.5 or lower thant -1.5 for a short.

The issue here is that the number of trades will be meaningless. The next step would be to test this strategy with the pair selection we did for the complete backtest.</p>

In [None]:
# set the instruments identifiers: tickers and exchange
exchange = "XNGS"

# Below we have all the pairs used to constitute the dataset
ticker_1 ="AAPL"
ticker_2 ="MSFT"


# create daily bars requests for the pair instruments
daily_request_1 = daily_bars.DailyBarsRequest(identifier = identifier.Identifier(exchange = exchange, ticker = ticker_1))
daily_request_2 = daily_bars.DailyBarsRequest(identifier = identifier.Identifier(exchange = exchange, ticker = ticker_2))

# open a gRPC channel, instantiate the daily bars service and get the reply for the 1st instrument
with open(os.environ['SSL_CERT_FILE'], 'rb') as f:
    credentials = grpc.ssl_channel_credentials(f.read())
with grpc.secure_channel(os.environ['GRPC_APIS'], credentials) as channel:
    daily_service = daily_bars_service.DailyBarsServiceStub(channel)
    response_1 = daily_service.DailyBars(request = daily_request_1, metadata = [('authorization', token)])
    
print("Total bars for ticker1 retrieved: ",len(response_1.data))

# open a gRPC channel, instantiate the daily bars service and get the reply for the 2nd instrument
with open(os.environ['SSL_CERT_FILE'], 'rb') as f:
    credentials = grpc.ssl_channel_credentials(f.read())
with grpc.secure_channel(os.environ['GRPC_APIS'], credentials) as channel:
    daily_service = daily_bars_service.DailyBarsServiceStub(channel)
    response_2 = daily_service.DailyBars(request = daily_request_2, metadata = [('authorization', token)])
    
print("Total bars for ticker2 retrieved: ",len(response_2.data))

# create pandas dataframe to store close prices for the pair instruments
length = 1000 # keep last 500 points
dates = [datetime(ts.date.year,ts.date.month, ts.date.day ) for ts in response_2.data[-length:]]
prices1 = [ts.close for ts in response_1.data[-length:]]
prices2 = [ts.close for ts in response_2.data[-length:]]
data = {'Date': dates, 'Price_1': prices1, 'Price_2': prices2}
df = pd.DataFrame(data=data)
df['Price_1'] = pd.to_numeric(df['Price_1'])
df['Price_2'] = pd.to_numeric(df['Price_2'])


# Here we calculate all the indicators used as variables by the Machine Learning models to predict later 

df['Rol1'] = df['Price_1'].shift(periods=5)   # Here we get the price 5 periods before to get the evolution on one week
df['Rol2'] = df['Price_2'].shift(periods=5)

df['Spread'] = np.log10(df['Price_1'] / df['Rol1']) - np.log10(df['Price_2'] / df['Rol2'])    #  cf Formula used to calculate the spread
df['z_score']=(df['Spread']-df['Spread'].rolling(window=30).mean())/df['Spread'].rolling(window=30).std()  # Here we calculate the spread normalized 

df['z_score_evol']=df['z_score']-df['z_score'].shift(periods=1)   # We get the evolution of the spread on one period

df['z_score_avg_week']=df['z_score'].rolling(window=5).mean() # We calculate the average spread on the week

period=30 # Period used for the historical volatility

number_days=252   #Number of trading days in a year (to get annualized volatility)

df['log_returns1'] = np.log(df['Price_1']/df['Price_1'].shift(periods=1))
df['log_returns2'] = np.log(df['Price_2']/df['Price_2'].shift(periods=1))

df['volatility1']= df['log_returns1'].rolling(window=period).std()*np.sqrt(number_days)  # Volatility of the first asset
df['volatility2']= df['log_returns2'].rolling(window=period).std()*np.sqrt(number_days)  # Volatility of the second asset

df['avg_volatility']=(df['volatility1']+df['volatility2'])/2  # We calculate the average volatility of the two assets

df['correlation']=0

for i in range (len(df)):
    df['correlation'].iloc[i]=df['Price_1'].iloc[i-30:i].corr(df['Price_2'].iloc[i-30:i])  # Correlation between the two assets in the last 30 periods
df['corr_evol']=df['correlation']-df['correlation'].shift(periods=1)

#df[['year','month','day']]= df.Date.str.split("-",expand=True)
df['month'] = df['Date'].dt.month  # Current month 
df['week'] = df['Date'].dt.isocalendar().week  # Current week of the year

df=df.drop(columns=['volatility1', 'volatility2','log_returns1','log_returns2','Rol1','Rol2'])  # We drop columns only used to calculate the indicators we wanted

df['future_z_score']= df['z_score'].shift(-5)  # We get the future normalized spread in 5 periods, which is going to be our target for the ML models

def target (spread):
    if spread>1.5:
        return 1
    elif spread<-1.5 :
        return -1
    else :
        return 0

df['target']= df['future_z_score'].apply(target)
df=df.drop(columns=['future_z_score'])   # We drop the future normalized spread since we are making it a classification problem with the target function
df=df[::5]   # We keep only one row out of 5 periods since we dont want the model to be able to use data between the day it makes the prediction and the day the prediction 
# is verified, otherwise it would be cheating
df
df=df.dropna()

In [None]:
to_predict=df.drop(columns=['Date','target'])
y_pred = gb.predict(to_predict)
y_pred
#Here we predict the future normalised spread used to define our postions entries/exits

In [None]:
temp=df
df=df.drop(columns=['Date'])
df['predicted']=y_pred
df['Date']=temp['Date']
df

In [None]:
def completeBacktest(dt):
    dt['multiplier'] = 1 + dt['tradeResult']
    dt['cumulativeResult'] = dt['multiplier'].cumprod()

    dt['tradeIs'] = ''
    dt.loc[dt['tradeResult']>0,'tradeIs'] = 'Good'
    dt.loc[dt['tradeResult']<=0,'tradeIs'] = 'Bad'
    dt['tradeResult'] = dt['tradeResult'] * 100  

    dt['tradeResult'] = pd.to_numeric(dt['tradeResult'])

    dt['highValue'] = dt['cumulativeResult'].cummax()

    dt['drawdown'] = dt['cumulativeResult'] - dt['highValue']
    dt['drawdown'] = pd.to_numeric(dt['drawdown'])

    # print(pd.isnull(dt.iloc[-1]['cumulativeResult']))
    # print(pd.isnull(dt.iloc[-2]['cumulativeResult']))
    strategyFinalResult = (dt.iloc[-2]['cumulativeResult']-1) if pd.isnull(dt.iloc[-1]['cumulativeResult']) else (dt.iloc[-1]['cumulativeResult']-1)
    # print(strategyFinalResult)

    try:
        tradesPerformance = round(dt.loc[(dt['tradeIs'] == 'Good') | (dt['tradeIs'] == 'Bad'), 'tradeResult'].sum()
                / dt.loc[(dt['tradeIs'] == 'Good') | (dt['tradeIs'] == 'Bad'), 'tradeResult'].count(), 2)
    except:
        tradesPerformance = 0
        print("/!\ There is no Good or Bad Trades in your BackTest, maybe a problem...")

    try:
        totalGoodTrades = dt.groupby('tradeIs')['date'].nunique()['Good']
        AveragePercentagePositivTrades = round(dt.loc[dt['tradeIs'] == 'Good', 'tradeResult'].sum()
                                               / dt.loc[dt['tradeIs'] == 'Good', 'tradeResult'].count(), 2)
        idbest = dt.loc[dt['tradeIs'] == 'Good', 'tradeResult'].idxmax()
        bestTrade = str(
            round(dt.loc[dt['tradeIs'] == 'Good', 'tradeResult'].max(), 2))
    except:
        totalGoodTrades = 0
        AveragePercentagePositivTrades = 0
        idbest = ''
        bestTrade = 0
        print("/!\ There is no Good Trades in your BackTest, maybe a problem...")

    try:
        totalBadTrades = dt.groupby('tradeIs')['date'].nunique()['Bad']
        AveragePercentageNegativTrades = round(dt.loc[dt['tradeIs'] == 'Bad', 'tradeResult'].sum()
                                               / dt.loc[dt['tradeIs'] == 'Bad', 'tradeResult'].count(), 2)
        idworst = dt.loc[dt['tradeIs'] == 'Bad', 'tradeResult'].idxmin()
        worstTrade = round(dt.loc[dt['tradeIs'] == 'Bad', 'tradeResult'].min(), 2)
    except:
        totalBadTrades = 0
        AveragePercentageNegativTrades = 0
        idworst = ''
        worstTrade = 0
        print("/!\ There is no Bad Trades in your BackTest, maybe a problem...")

    totalTrades = totalBadTrades + totalGoodTrades

    try:
        TotalLongTrades = len(dt.loc[(dt['position'] == 'LONG') & (dt['openOrClose'] == 'Close')])
        AverageLongTrades = round((dt.loc[(dt['position'] == 'LONG') & (dt['openOrClose'] == 'Close'), 'tradeResult'].sum() / TotalLongTrades),2)
        idBestLong = dt.loc[dt['position'] == 'LONG', 'tradeResult'].idxmax()
        bestLongTrade = str(
            round(dt.loc[dt['position'] == 'LONG', 'tradeResult'].max(), 2))
        idWorstLong = dt.loc[dt['position'] == 'LONG', 'tradeResult'].idxmin()
        worstLongTrade = str(
            round(dt.loc[dt['position'] == 'LONG', 'tradeResult'].min(), 2))
    except:
        AverageLongTrades = 0
        TotalLongTrades = 0
        bestLongTrade = ''
        idBestLong = ''
        idWorstLong = ''
        worstLongTrade = ''
        print("/!\ There is no LONG Trades in your BackTest, maybe a problem...")

    try:
        TotalShortTrades = len(dt.loc[(dt['position'] == 'SHORT') & (dt['openOrClose'] == 'Close')])
        AverageShortTrades = round(dt.loc[dt['position'] == 'SHORT', 'tradeResult'].sum()
                                   / dt.loc[dt['position'] == 'SHORT', 'tradeResult'].count(), 2)
        idBestShort = dt.loc[dt['position'] == 'SHORT', 'tradeResult'].idxmax()
        bestShortTrade = str(
            round(dt.loc[dt['position'] == 'SHORT', 'tradeResult'].max(), 2))
        idWorstShort = dt.loc[dt['position'] == 'SHORT', 'tradeResult'].idxmin()
        worstShortTrade = str(
            round(dt.loc[dt['position'] == 'SHORT', 'tradeResult'].min(), 2))
    except:
        AverageShortTrades = 0
        TotalShortTrades = 0
        bestShortTrade = ''
        idBestShort = ''
        idWorstShort = ''
        worstShortTrade = ''
        print("/!\ There is no SHORT Trades in your BackTest, maybe a problem...")

    try:
        totalGoodLongTrade = dt.groupby(['position', 'tradeIs']).size()['LONG']['Good']
    except:
        totalGoodLongTrade = 0
        print("/!\ There is no good LONG Trades in your BackTest, maybe a problem...")

    try:
        totalBadLongTrade = dt.groupby(['position', 'tradeIs']).size()['LONG']['Bad']
    except:
        totalBadLongTrade = 0
        print("/!\ There is no bad LONG Trades in your BackTest, maybe a problem...")

    try:
        totalGoodShortTrade = dt.groupby(['position', 'tradeIs']).size()['SHORT']['Good']
    except:
        totalGoodShortTrade = 0
        print("/!\ There is no good SHORT Trades in your BackTest, maybe a problem...")

    try:
        totalBadShortTrade = dt.groupby(['position', 'tradeIs']).size()['SHORT']['Bad']
    except:
        totalBadShortTrade = 0
        print("/!\ There is no bad SHORT Trades in your BackTest, maybe a problem...")
        
    try:
        dt['timeDeltaTrade'] = dt["timeSince"]
        dt['timeDeltaNoTrade'] = dt['timeDeltaTrade']
        dt.loc[dt['openOrClose'] ==
                     'Open', 'timeDeltaTrade'] = None
        dt.loc[dt['openOrClose'] ==
                     'Close', 'timeDeltaNoTrade'] = None
    except:
        print("/!\ Error in time delta")
        dt['timeDeltaTrade'] = 0
        dt['timeDeltaNoTrade'] = 0

    winRateRatio = (totalGoodTrades/totalTrades) * 100

    maxDrawdown = dt['drawdown'].min()
    maxDrawdownId = dt['drawdown'].idxmin()
    
    print("Pair Strategy on : " + ticker_1 + "/" + ticker_2)
    print("Period : [" + str(df.iloc[0]['Date']) + "] -> [" +
          str(str(df.iloc[-1]['Date']) + "]"))

    print("\n----- General Informations -----")
    print("Performance vs US Dollar :", round(strategyFinalResult*100, 2), "%")
    print("Best trade : +"+bestTrade, "%, the", dt.iloc[idbest]['date'])
    print("Worst trade :", worstTrade, "%, the", dt.iloc[idworst]['date'])
    print("Max DrawDown :", str(round(100*maxDrawdown, 2)), "%, the", dt.iloc[maxDrawdownId]['date'])

    print("\n----- Trades Informations -----")
    print("Total trades on period :",totalTrades)
    print("Number of positive trades :", totalGoodTrades)
    print("Number of negative trades : ", totalBadTrades)
    print("Trades win rate ratio :", round(winRateRatio, 2), '%')
    print("Average trades performance :",tradesPerformance,"%")
    print("Average positive trades :", AveragePercentagePositivTrades, "%")
    print("Average negative trades :", AveragePercentageNegativTrades, "%")

    print("\n----- LONG Trades Informations -----")
    print("Number of LONG trades :",TotalLongTrades)
    print("Average LONG trades performance :",AverageLongTrades, "%")
    print("Best  LONG trade +"+bestLongTrade, "%, the ", dt.iloc[idBestLong]['date'])
    print("Worst LONG trade", worstLongTrade, "%, the ", dt.iloc[idWorstLong]['date'])
    print("Number of positive LONG trades :",totalGoodLongTrade)
    print("Number of negative LONG trades :",totalBadLongTrade)
    print("LONG trade win rate ratio :", round(totalGoodLongTrade/TotalLongTrades*100, 2), '%')

    print("\n----- SHORT Trades Informations -----")
    print("Number of SHORT trades :",TotalShortTrades)
    print("Average SHORT trades performance :",AverageShortTrades, "%")
    print("Best  SHORT trade +"+bestShortTrade, "%, the ", dt.iloc[idBestShort]['date'])
    print("Worst SHORT trade", worstShortTrade, "%, the ", dt.iloc[idWorstShort]['date'])
    print("Number of positive SHORT trades :",totalGoodShortTrade)
    print("Number of negative SHORT trades :",totalBadShortTrade)
    print("SHORT trade win rate ratio :", round(totalGoodShortTrade/TotalShortTrades*100, 2), '%')
    
    print("\n----- Time Informations -----")
    print("Average time duration for a trade :", round(
        dt['timeDeltaTrade'].mean(skipna=True), 2), "periods")
    print("Maximum time duration for a trade :",
          dt['timeDeltaTrade'].max(skipna=True), "periods")
    print("Minimum time duration for a trade :",
          dt['timeDeltaTrade'].min(skipna=True), "periods")
    print("Average time duration between two trades :", round(
        dt['timeDeltaNoTrade'].mean(skipna=True), 2), "periods")
    print("Maximum time duration between two trades :",
          dt['timeDeltaNoTrade'].max(skipna=True), "periods")
    print("Minimum time duration between two trades :",
          dt['timeDeltaNoTrade'].min(skipna=True), "periods")
    
    print("\n----- Trades Reasons -----")
    reasons = dt['reason'].unique()
    for r in reasons:
        print(r+" number :", dt.groupby('reason')
              ['date'].nunique()[r])
        
def plot_wallet_evolution(dfTrades):
    dfTrades = dfTrades.set_index(dfTrades['date'])
    dfTrades.index = pd.to_datetime(dfTrades.index)
    dfTrades['wallet'].plot(figsize=(20, 10))
    print("\n----- Plot -----")

In [None]:
dt = None
dt = pd.DataFrame(columns=['date', 'openOrClose', 'position', 'price1', 'price2', 'tradeResult', 'wallet'])
dfTest = df.copy()
#dfTest.drop(columns="target")
# df['Date'] = pd.to_datetime(df['Date'])  
# mask = (df['Date'] >= '2020-02') & (df['Date'] <= '2020-06')
# dfTest = df.loc[mask]
initialWallet = 1000
wallet = initialWallet
orderInProgress = ''
previousRow = dfTest.iloc[0]
lastPrice1 = 0
lastPrice2 = 0
currentTimeOpen = 0
timeSince = 0

# -- Hyper parameters --
stopLossActivation = True
timeLimitActivation = True
spreadBuy = 0.017
speadSell = -0.017
slPct = 0.05
timeLimit = 10


# -- Backtest functions --
def openLongCondition(row, previousRow):
    if(row['predicted'] == 1 and previousRow['predicted'] < 1):
#     if(row['Spread'] < speadSell):
        return True
    else:
        return False
    
def closeLongCondition(row, previousRow):
    if(row['predicted'] > -2):
        return True
    else:
        return False
    
def openShortCondition(row, previousRow):
    if(row['predicted'] == -1 and previousRow['predicted'] > -1):
#     if(row['Spread'] > spreadBuy):
        return True
    else:
        return False
    
def closeShortCondition(row, previousRow):
    if(row['Spread'] < 4):
        return True
    else:
        return False

# -- Utility fonction --
def getTradeResult(position, price1, price2, lastPrice1, lastPrice2):
    evolPrice1 = (price1 - lastPrice1) / lastPrice1
    evolPrice2 = (price2 - lastPrice2) / lastPrice2
    if position == 'LONG':
        return evolPrice1-evolPrice2
    else:
        return evolPrice2-evolPrice1

# -- Backtesting --
for index, row in dfTest.iterrows():
    timeSince += 1
    if currentTimeOpen > 0:
        currentTimeOpen += 1
        # -- If there is an order in progress --
    if orderInProgress != '':
        # -- Check if there is a LONG order in progress --
        if orderInProgress == 'LONG':
            tradeResult = getTradeResult("LONG", row['Price_1'], row['Price_2'], lastPrice1, lastPrice2)
            if tradeResult < -slPct and stopLossActivation:
                orderInProgress = ''
                wallet = wallet + tradeResult * wallet
                # -- Add the trade to DT to analyse it later --
                closeLongRow = {
                    'date': row['Date'], 
                    'openOrClose':'Close', 
                    'position': "LONG", 
                    'price1': row['Price_1'], 
                    'price2': row['Price_2'],
                    'tradeResult': tradeResult,
                    'wallet': wallet,
                    'reason': 'Close Long Stop Loss',
                    'timeSince': timeSince
                }
                dt = dt.append(closeLongRow, ignore_index=True)
                timeSince = 0
                
            elif currentTimeOpen > timeLimit:
                orderInProgress = ''
                wallet = wallet + tradeResult * wallet
                # -- Add the trade to DT to analyse it later --
                closeLongRow = {
                    'date': row['Date'], 
                    'openOrClose':'Close', 
                    'position': "LONG", 
                    'price1': row['Price_1'], 
                    'price2': row['Price_2'],
                    'tradeResult': tradeResult,
                    'wallet': wallet,
                    'reason': 'Close Long Time Limit',
                    'timeSince': timeSince
                }
                dt = dt.append(closeLongRow, ignore_index=True)
                timeSince = 0
            # -- Check If you have to close the LONG --
            elif closeLongCondition(row, previousRow) == True:
                orderInProgress = ''
                wallet = wallet + tradeResult * wallet
                # -- Add the trade to DT to analyse it later --
                closeLongRow = {
                    'date': row['Date'], 
                    'openOrClose':'Close', 
                    'position': "LONG", 
                    'price1': row['Price_1'], 
                    'price2': row['Price_2'],
                    'tradeResult': tradeResult,
                    'wallet': wallet,
                    'reason': 'Close Long Market',
                    'timeSince': timeSince
                }
                dt = dt.append(closeLongRow, ignore_index=True)
                timeSince = 0
        if orderInProgress == 'SHORT':
            tradeResult = getTradeResult("SHORT", row['Price_1'], row['Price_2'], lastPrice1, lastPrice2)
            if tradeResult < -slPct and stopLossActivation:
                orderInProgress = ''
                wallet = wallet + tradeResult * wallet
                # -- Add the trade to DT to analyse it later --
                closeShortRow = {
                    'date': row['Date'], 
                    'openOrClose':'Close', 
                    'position': "SHORT", 
                    'price1': row['Price_1'], 
                    'price2': row['Price_2'],
                    'tradeResult': tradeResult,
                    'wallet': wallet,
                    'reason': 'Close Short Stop Loss',
                    'timeSince': timeSince
                }
                dt = dt.append(closeShortRow, ignore_index=True)
                timeSince = 0
                
            elif currentTimeOpen > timeLimit:
                orderInProgress = ''
                wallet = wallet + tradeResult * wallet
                # -- Add the trade to DT to analyse it later --
                closeShortRow = {
                    'date': row['Date'], 
                    'openOrClose':'Close', 
                    'position': "SHORT", 
                    'price1': row['Price_1'], 
                    'price2': row['Price_2'],
                    'tradeResult': tradeResult,
                    'wallet': wallet,
                    'reason': 'Close Short Time Limit',
                    'timeSince': timeSince
                }
                dt = dt.append(closeShortRow, ignore_index=True)
                timeSince = 0
            # -- Check If you have to close the LONG --
            elif closeShortCondition(row, previousRow) == True:
                orderInProgress = ''
                wallet = wallet + tradeResult * wallet
                # -- Add the trade to DT to analyse it later --
                closeShortRow = {
                    'date': row['Date'], 
                    'openOrClose':'Close', 
                    'position': "SHORT", 
                    'price1': row['Price_1'], 
                    'price2': row['Price_2'],
                    'tradeResult': tradeResult,
                    'wallet': wallet,
                    'reason': 'Close Short Market',
                    'timeSince': timeSince
                }
                dt = dt.append(closeShortRow, ignore_index=True)
                timeSince = 0
                
    # -- If there is NO order in progress --
    elif orderInProgress == '':
        if openLongCondition(row, previousRow) == True:
            orderInProgress = 'LONG'
            lastPrice1 = row['Price_1']
            lastPrice2 = row['Price_2']
            if timeLimitActivation:
                currentTimeOpen = 1
            openLongRow = {
                'date': row['Date'], 
                'openOrClose':'Open', 
                'position': "LONG", 
                'price1': row['Price_1'], 
                'price2': row['Price_2'],
                'wallet': wallet,
                'reason': 'Open Long Market',
                'timeSince': timeSince
            }
            dt = dt.append(openLongRow, ignore_index=True)
            timeSince = 0
            
        if openShortCondition(row, previousRow) == True:
            orderInProgress = 'SHORT'
            lastPrice1 = row['Price_1']
            lastPrice2 = row['Price_2']
            if timeLimitActivation:
                currentTimeOpen = 1            
            closeShortRow = {
                'date': row['Date'], 
                'openOrClose':'Open', 
                'position': "SHORT", 
                'price1': row['Price_1'], 
                'price2': row['Price_2'],
                'wallet': wallet,
                'reason': 'Open Short Market',
                'timeSince': timeSince
            }
            dt = dt.append(closeShortRow, ignore_index=True)
            timeSince = 0
    previousRow = row        

    
# print('Final wallet',round(wallet,2),'$')
nbGoodTrade = dt['date'].loc[dt['tradeResult']>0].count()
nbTotalTrade = dt['date'].loc[dt['openOrClose'] == 'Close'].count()
winRateRatio = nbGoodTrade / nbTotalTrade
# print('Win rate ratio :', round(winRateRatio*100,2), '%')
completeBacktest(dt)
plot_wallet_evolution(dt)
# dt