In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import requests
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt

In [32]:
pairs = ["XRP","LINK","ADA","ETH","DOT","BTC","TRX","KAVA","AAVE","UNI",\
         "GRT","COMP","WAVES","KSM","YFI","ATOM","EOS","BAT","DOGE","DASH",\
         "ZEC","ETC","FIL","ALGO"]
        
long_pairs = ["XXRPZEUR","LINKEUR","ADAEUR","XETHZEUR","DOTEUR","XXBTZEUR",\
             "TRXEUR","KAVAEUR","AAVEEUR","UNIEUR","GRTEUR","COMPEUR","WAVESEUR",\
             "KSMEUR","YFIEUR","ATOMEUR","EOSEUR","BATEUR","XDGEUR","DASHEUR",\
              "XZECZEUR","XETCZEUR","FILEUR","ALGOEUR"]




dfs = []

for pair,long_pair in zip(pairs,long_pairs):

  response = requests.get(f"https://api.kraken.com/0/public/OHLC?pair={pair}EUR&since=0&interval=1440")
  response = response.json()["result"][long_pair]

  date = [item[0] for item in response]
  date = [datetime.utcfromtimestamp(item).strftime('%Y-%m-%d %H') for item in date]
  date = pd.to_datetime(date)

  close = [item[4] for item in response]
  volume = [item[6] for item in response]

  df = pd.DataFrame(index = date, data=list(zip(close,volume)),\
                    columns=(f"{pair}_close",f"{pair}_volume"))
  df = df.astype(float)
  dfs.append(df)


df_main = pd.DataFrame(index=dfs[0].index)



for df in dfs:
  df_main = pd.concat([df_main,df], axis=1)

df_main.dropna(axis=0, how="any", inplace=True)

for pair in pairs:
  df_main[f"{pair}_return"] = df_main[f"{pair}_close"].pct_change(1)


dfs_returns = pd.DataFrame(index=df_main.index)

for pair in pairs:
  df = df_main[[f"{pair}_return"]]
  dfs_returns = pd.concat([dfs_returns,df], axis=1)


df_close = df_main[["XRP_close","LINK_close","ADA_close","ETH_close","DOT_close",\
         "BTC_close","TRX_close","KAVA_close","AAVE_close","UNI_close",\
         "GRT_close","COMP_close","WAVES_close","KSM_close","YFI_close",\
         "ATOM_close","EOS_close","BAT_close","DOGE_close","DASH_close",\
         "ZEC_close","ETC_close","FIL_close","ALGO_close"]].copy()

In [4]:
# Will be used returns due prices are not stationary 
# even normalizing the data the future data can be outside range
# and returns are more -stationary

In [29]:
# rows with missing values dropped, in this case as there are "new" coins,
# the dataframe of returns become small, to have real returns for all coins.

# **Pre-Processing**

In [33]:
dfs_returns.shape

(66, 24)

In [34]:
dfs_returns.head()

Unnamed: 0,XRP_return,LINK_return,ADA_return,ETH_return,DOT_return,BTC_return,TRX_return,KAVA_return,AAVE_return,UNI_return,GRT_return,COMP_return,WAVES_return,KSM_return,YFI_return,ATOM_return,EOS_return,BAT_return,DOGE_return,DASH_return,ZEC_return,ETC_return,FIL_return,ALGO_return
2020-12-18,,,,,,,,,,,,,,,,,,,,,,,,
2020-12-19,-0.014004,-0.002685,-0.007701,0.002755,-0.008459,0.026978,-0.000944,0.008001,-0.016642,0.046738,0.795455,-0.034754,-0.017386,-0.035349,-0.016626,-0.009075,-0.004965,-0.019472,0.018373,0.007486,-0.006038,0.005,-0.012282,0.00445
2020-12-20,-0.032755,-0.026811,-0.010927,-0.02585,-0.021813,-0.010599,0.003976,-0.032275,-0.029408,-0.047132,0.537823,-0.00728,-0.057167,-0.028698,-0.044729,-0.045185,0.030741,-0.051803,0.171704,0.009858,0.010325,0.013012,0.007417,-0.044341
2020-12-21,-0.072315,-0.055987,-0.053957,-0.046183,-0.059826,-0.029537,-0.060064,-0.048209,-0.05045,-0.064107,-0.361995,-0.074532,-0.072481,-0.078636,-0.057311,0.00224,-0.06117,-0.025175,0.036669,-0.043808,-0.079767,-0.051757,-0.111823,-0.038719
2020-12-22,-0.128693,0.040076,0.026082,0.049968,0.059321,0.049424,-0.013264,0.002602,0.048465,0.061892,0.15438,0.024978,0.067131,0.094968,0.010033,-0.016346,-0.011227,-0.027289,-0.053415,0.01815,-0.005058,-0.003386,-0.018919,0.036511


In [35]:
# In this case the coin to predict will be bitcoin. will be shift by 1 step
# So input and target will be at the same row. 

dfs_returns["BTC_return"] = dfs_returns["BTC_return"].shift(-1)
dfs_returns["BTC_return"].tail()

2021-02-17   -0.012758
2021-02-18    0.078444
2021-02-19    0.001939
2021-02-20    0.032576
2021-02-21         NaN
Freq: D, Name: BTC_return, dtype: float64

In [36]:
# Spliting data
# First row have missing value, will be skipped
# Last row have missing value, will be skipped

Ntest=10
train = dfs_returns.iloc[1:-Ntest]
test = dfs_returns.iloc[-Ntest:-1]

In [37]:
dfs_returns.columns

Index(['XRP_return', 'LINK_return', 'ADA_return', 'ETH_return', 'DOT_return',
       'BTC_return', 'TRX_return', 'KAVA_return', 'AAVE_return', 'UNI_return',
       'GRT_return', 'COMP_return', 'WAVES_return', 'KSM_return', 'YFI_return',
       'ATOM_return', 'EOS_return', 'BAT_return', 'DOGE_return', 'DASH_return',
       'ZEC_return', 'ETC_return', 'FIL_return', 'ALGO_return'],
      dtype='object')

In [39]:
# Selecting stocks we want to use 

x_cols = ['XRP_return', 'LINK_return', 'ADA_return', 'ETH_return', 'DOT_return',
        'TRX_return', 'KAVA_return', 'AAVE_return', 'UNI_return',
       'GRT_return', 'COMP_return', 'WAVES_return', 'KSM_return', 'YFI_return',
       'ATOM_return', 'EOS_return', 'BAT_return', 'DOGE_return', 'DASH_return',
       'ZEC_return', 'ETC_return', 'FIL_return', 'ALGO_return']

x_cols

['XRP_return',
 'LINK_return',
 'ADA_return',
 'ETH_return',
 'DOT_return',
 'TRX_return',
 'KAVA_return',
 'AAVE_return',
 'UNI_return',
 'GRT_return',
 'COMP_return',
 'WAVES_return',
 'KSM_return',
 'YFI_return',
 'ATOM_return',
 'EOS_return',
 'BAT_return',
 'DOGE_return',
 'DASH_return',
 'ZEC_return',
 'ETC_return',
 'FIL_return',
 'ALGO_return']

In [40]:
Xtrain = train[x_cols]
Ytrain = train["BTC_return"]
Xtest = test[x_cols]
Ytest = test["BTC_return"]

In [41]:
Xtrain.head()

Unnamed: 0,XRP_return,LINK_return,ADA_return,ETH_return,DOT_return,TRX_return,KAVA_return,AAVE_return,UNI_return,GRT_return,COMP_return,WAVES_return,KSM_return,YFI_return,ATOM_return,EOS_return,BAT_return,DOGE_return,DASH_return,ZEC_return,ETC_return,FIL_return,ALGO_return
2020-12-19,-0.014004,-0.002685,-0.007701,0.002755,-0.008459,-0.000944,0.008001,-0.016642,0.046738,0.795455,-0.034754,-0.017386,-0.035349,-0.016626,-0.009075,-0.004965,-0.019472,0.018373,0.007486,-0.006038,0.005,-0.012282,0.00445
2020-12-20,-0.032755,-0.026811,-0.010927,-0.02585,-0.021813,0.003976,-0.032275,-0.029408,-0.047132,0.537823,-0.00728,-0.057167,-0.028698,-0.044729,-0.045185,0.030741,-0.051803,0.171704,0.009858,0.010325,0.013012,0.007417,-0.044341
2020-12-21,-0.072315,-0.055987,-0.053957,-0.046183,-0.059826,-0.060064,-0.048209,-0.05045,-0.064107,-0.361995,-0.074532,-0.072481,-0.078636,-0.057311,0.00224,-0.06117,-0.025175,0.036669,-0.043808,-0.079767,-0.051757,-0.111823,-0.038719
2020-12-22,-0.128693,0.040076,0.026082,0.049968,0.059321,-0.013264,0.002602,0.048465,0.061892,0.15438,0.024978,0.067131,0.094968,0.010033,-0.016346,-0.011227,-0.027289,-0.053415,0.01815,-0.005058,-0.003386,-0.018919,0.036511
2020-12-23,-0.416835,-0.136053,-0.12425,-0.07901,-0.0813,-0.140768,-0.147584,-0.112547,-0.105435,-0.252872,-0.100756,-0.175706,-0.113539,-0.179641,-0.107141,-0.201388,-0.115232,-0.169906,-0.132331,-0.175275,-0.1671,-0.085785,-0.137046


In [42]:
Ytrain.head()

2020-12-19   -0.010599
2020-12-20   -0.029537
2020-12-21    0.049424
2020-12-22   -0.021303
2020-12-23    0.019667
Freq: D, Name: BTC_return, dtype: float64

# **Linear Regression**

In [44]:
# 0 naive approach, 1 good prediction

from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(Xtrain,Ytrain)
model.score(Xtrain, Ytrain), model.score(Xtest,Ytest)

(0.3628395517986307, -0.9297928675403409)

In [45]:
# We don't care about the value of the prediction just positive or negative

Ptrain = model.predict(Xtrain)
Ptest = model.predict(Xtest)

np.mean(np.sign(Ptrain) == np.sign(Ytrain)), np.mean(np.sign(Ptest)==np.sign(Ytest))

(0.6363636363636364, 0.5555555555555556)

In [49]:
# values obtaines in the predictions (it could be 0 = hold)
set(np.sign(Ptrain)), set(np.sign(Ptest))

({-1.0, 1.0}, {-1.0, 1.0})

In [50]:
# Dates where the prediction and real value where similar (increase or decrease)

np.sign(Ptest)==np.sign(Ytest)

2021-02-12    False
2021-02-13    False
2021-02-14     True
2021-02-15     True
2021-02-16     True
2021-02-17    False
2021-02-18     True
2021-02-19     True
2021-02-20    False
Freq: D, Name: BTC_return, dtype: bool

In [51]:
dfs_returns["Position"] = 0
dfs_returns.loc[1:-Ntest, "Position"] = (Ptrain > 0)
dfs_returns.loc[-Ntest:-1, "Position"] = (Ptest > 0)

In [53]:
dfs_returns["AlgoReturn"] = dfs_returns["Position"] * dfs_returns["BTC_return"]

In [54]:
# Total algo log return train
dfs_returns.iloc[1:-Ntest]["AlgoReturn"].sum()

0.8681398167241403

In [55]:
# Total algo log return test
dfs_returns.iloc[-Ntest:-1]["AlgoReturn"].sum()

0.15798102646958556

In [64]:
# Total return buy-hold 
Ytrain.sum(), Ytest.sum()

(0.7769591724312581, 0.20411629949364696)

# **Logistic Regression**

In [58]:
# C argument, controls regularization penalty, preventing large weights
# We need to conver target in binary format "C"

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(C=10)
Ctrain = (Ytrain > 0)
Ctest = (Ytest > 0)
model.fit(Xtrain, Ctrain)
model.score(Xtrain,Ctrain), model.score(Xtest, Ctest)

(0.7636363636363637, 0.6666666666666666)

In [59]:
Ptrain = model.predict(Xtrain)
Ptest = model.predict(Xtest)

set(Ptrain), set(Ptest)

({False, True}, {False, True})

In [60]:
# Assign return of the algorithm to the position column


dfs_returns.loc[1:-Ntest, "Position"] = Ptrain
dfs_returns.loc[-Ntest:-1, "Position"] = Ptest 
dfs_returns["AlgoReturn"] = dfs_returns["Position"] * dfs_returns["BTC_return"]

In [61]:
# Total algo log return train
dfs_returns.iloc[1:-Ntest]["AlgoReturn"].sum()

0.9660067026018117

In [62]:
# Total algo log return test
dfs_returns.iloc[-Ntest:-1]["AlgoReturn"].sum()

0.16143718649490513

In [63]:
# Total return buy-hold
Ytrain.sum(), Ytest.sum()

(0.7769591724312581, 0.20411629949364696)

# **Random Forest classifier**

In [84]:
from sklearn.ensemble import RandomForestClassifier
model =  RandomForestClassifier(random_state=3)
model.fit(Xtrain,Ctrain)
model.score(Xtrain, Ctrain), model.score(Xtest, Ctest)

(1.0, 0.7777777777777778)

In [85]:
Ptrain = model.predict(Xtrain)
Ptest = model.predict(Xtest)

In [86]:
dfs_returns.loc[1:-Ntest, "Position"] = Ptrain
dfs_returns.loc[-Ntest:-1, "Position"] = Ptest 
dfs_returns["AlgoReturn"] = dfs_returns["Position"] * dfs_returns["BTC_return"]

In [87]:
# Total algo log return train
dfs_returns.iloc[1:-Ntest]["AlgoReturn"].sum()

1.4508469032384759

In [88]:
# Total algo log return test
dfs_returns.iloc[-Ntest:-1]["AlgoReturn"].sum()

0.18980374009639345

In [89]:
# Total return buy-hold
Ytrain.sum(), Ytest.sum()

(0.7769591724312581, 0.20411629949364696)