# __PROJECT 1: PREDICTABILITY OF CRYPTOCURRENY__

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils import read_data, series_to_supervised
from prediction import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")

In [2]:
# read in returns of 26 conventional currencies and 11 cryptocurrencies
returns = read_data(name='returns', binary=True)
# since most of the cryptocurrencies don't have value before 2015, I only select a subset of cryptocurrencies
returns.drop(columns=['USDBUSD', 'USDBCH', 'USDADA', 'USDLINK', 'USDBNB'], inplace=True)
# the scope of this study is from 2015 to 2022
returns = returns[returns.index >= '2015-01-01']

In [3]:
# create a decision tree classifier
cryptos = ['USDBTC', 'USDDOGE', 'USDETH', 'USDLTC', 'USDXRP', 'USDUSDT']
clf = DecisionTreeClassifier(random_state=0)
predicted_all, predicted_part, real = binary_task(returns, 'BTC', cryptos, clf)

Trained successfully!


In [24]:
# tune hyperparameters to maximize the accuracy
criterions = ['gini', 'entropy']
max_depths = [i for i in range(5, 26, 5)]
min_sample_splits = [i for i in range(20, 101, 20)]
min_sample_leafs = [i for i in range(20, 101, 20)]

# log
log = pd.DataFrame(columns=['type', 'criterion', 'max_depth',
                            'min_sample_split', 'min_sample_leaf',
                            'TP', 'TN', 'FP', 'FN'])

# stupid method
for criterion in criterions:
    for max_depth in max_depths:
        for min_sample_split in min_sample_splits:
            for min_sample_leaf in min_sample_leafs:
                clf = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth,
                                             min_samples_split=min_sample_split,
                                             min_samples_leaf=min_sample_leaf,
                                             random_state=0)
                predicted_all, predicted_part, real = binary_task(returns, 'BTC', cryptos, clf)
                confusion = metrics.confusion_matrix(real, predicted_part)
                log = log.append({'type': 'part', 'criterion': criterion, 'max_depth': max_depth,
                                  'min_sample_split': min_sample_split, 'min_sample_leaf': min_sample_leaf,
                                  'TP': confusion[1, 1], 'TN': confusion[0, 0],
                                  'FP': confusion[0, 1], 'FN': confusion[1, 0]}, ignore_index=True)
                confusion = metrics.confusion_matrix(real, predicted_all)
                log = log.append({'type': 'all', 'criterion': criterion, 'max_depth': max_depth,
                                  'min_sample_split': min_sample_split, 'min_sample_leaf': min_sample_leaf,
                                  'TP': confusion[1, 1], 'TN': confusion[0, 0],
                                  'FP': confusion[0, 1], 'FN': confusion[1, 0]}, ignore_index=True)
                print(f'Finish tree({criterion}, {max_depth}, {min_sample_split}, {min_sample_leaf}).')

Trained successfully!
Finish tree(gini, 5, 20, 20).
Trained successfully!
Finish tree(gini, 5, 20, 40).
Trained successfully!
Finish tree(gini, 5, 20, 60).
Trained successfully!
Finish tree(gini, 5, 20, 80).
Trained successfully!
Finish tree(gini, 5, 20, 100).
Trained successfully!
Finish tree(gini, 5, 40, 20).
Trained successfully!
Finish tree(gini, 5, 40, 40).
Trained successfully!
Finish tree(gini, 5, 40, 60).
Trained successfully!
Finish tree(gini, 5, 40, 80).
Trained successfully!
Finish tree(gini, 5, 40, 100).
Trained successfully!
Finish tree(gini, 5, 60, 20).
Trained successfully!
Finish tree(gini, 5, 60, 40).
Trained successfully!
Finish tree(gini, 5, 60, 60).
Trained successfully!
Finish tree(gini, 5, 60, 80).
Trained successfully!
Finish tree(gini, 5, 60, 100).
Trained successfully!
Finish tree(gini, 5, 80, 20).
Trained successfully!
Finish tree(gini, 5, 80, 40).
Trained successfully!
Finish tree(gini, 5, 80, 60).
Trained successfully!
Finish tree(gini, 5, 80, 80).
Trained s

In [None]:
# test GRUs


In [25]:
log

Unnamed: 0,type,criterion,max_depth,min_sample_split,min_sample_leaf,TP,TN,FP,FN
0,part,gini,5,20,20,135,248,140,202
1,all,gini,5,20,20,125,252,136,212
2,part,gini,5,20,40,136,254,134,201
3,all,gini,5,20,40,130,241,147,207
4,part,gini,5,20,60,133,267,121,204
...,...,...,...,...,...,...,...,...,...
495,all,entropy,25,100,60,126,238,150,211
496,part,entropy,25,100,80,112,284,104,225
497,all,entropy,25,100,80,126,246,142,211
498,part,entropy,25,100,100,99,289,99,238


In [7]:
metrics.precision_score(real, predicted_part)

0.4727272727272727

In [None]:
plt.style.use("seaborn-white")
plt.figure(figsize=(10, 8))
plt.plot(np.arange(1827,1827 + len([list(item[0])[0] for item in test_y])), [list(item[0])[0] for item in test_y], color='black')
# plt.plot(np.arange(366,1087), 1/BTC_part_, color='blue')
plt.plot(np.arange(1827,1827 + len([list(item[0])[0] for item in test_y])), predicted_all, color='red')
plt.show()