# Test signals

### Import

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path 
import datatable as dt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

## 1. Data

### Get signals

In [2]:
def display_info(df):
    print(f'Shape : {df.shape}')
    print(f'Start date : {df.index.min()}')
    print(f'End date : {df.index.max()}')
    display(df.tail(3))
    
def get_data(filepath, resample=False):
    df = dt.fread(filepath).to_pandas() 
    df = df.set_index('date')
    if resample:
        df = df.resample('H').ffill()
    df.index = pd.to_datetime(df.index).strftime('%Y-%m-%d %H')
    df = df[~df.index.duplicated(keep='first')]
    display_info(df)
    return df

In [3]:
filepath = Path('../csv/signals_crypto_fear&greed_index.csv')
df_index = get_data(filepath, resample=True)

filepath = Path('../csv/signals_btc_strat.csv')
df_ta = get_data(filepath, resample=False)

filepath = Path('../csv/signals_glassnode.csv')
df_glassnode = get_data(filepath, resample=True)

filepath = Path('../csv/signals_wavelets.csv')
df_wavelet = get_data(filepath, resample=False)

Shape : (36601, 2)
Start date : 2018-02-01 00
End date : 2022-04-06 00


Unnamed: 0_level_0,fng_value,fng_classification
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-04-05 22,53,0
2022-04-05 23,53,0
2022-04-06 00,48,0


Shape : (17130, 6)
Start date : 2020-04-09 20
End date : 2022-04-07 00


Unnamed: 0_level_0,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-06 19,True,True,0,-1,False,False
2022-04-06 20,True,True,0,-1,False,False
2022-04-07 00,True,True,0,-1,False,False


Shape : (19801, 5)
Start date : 2020-01-01 00
End date : 2022-04-05 00


Unnamed: 0_level_0,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-04 22,6.099674,6.561471,4.737371,0.0,14.345889
2022-04-04 23,6.099674,6.561471,4.737371,0.0,14.345889
2022-04-05 00,6.792104,2.989913,4.85569,0.0,10.875053


Shape : (17099, 2)
Start date : 2020-04-11 04
End date : 2022-04-07 00


Unnamed: 0_level_0,wavelet_smoothing,wavelet_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-04-06 19,41793.751592,True
2022-04-06 20,41799.037093,True
2022-04-07 00,41798.827121,True


### Get target

#### Close price

In [26]:
filepath = Path('../../data/csv/crypto_market_data.csv')
# Get crypto data
df_crypto = dt.fread(filepath).to_pandas() 
df_crypto = df_crypto.T.set_index([0, 1]).T
df_crypto = df_crypto.set_index('')
df_crypto = df_crypto.astype(float)
df_crypto.index.names = ['date']

# Get the daily return (on the close price of the BTC)
df_close = df_crypto.T.swaplevel(0, 1).T['Close']['BTC-USD']
df_close.index = pd.to_datetime(df_close.index)
#df_close = df_close[df_close.index.hour == 23]
#df_close.index = df_close.index.date
df_close.index = pd.to_datetime(df_close.index).strftime('%Y-%m-%d %H')
df_close = pd.DataFrame(df_close, index=df_close.index, columns=['BTC-USD'])
display(df_close.tail(3))

Unnamed: 0_level_0,BTC-USD
date,Unnamed: 1_level_1
2022-04-07 09,43502.988281
2022-04-07 09,43502.988281
2022-04-07 09,43482.363281


#### Binairy classification

In [27]:
df_return = df_close.pct_change().rename(columns={'BTC-USD':'return'})
df_return = df_return.dropna()
df_return['return_clf'] = (df_return > 0)*1
display(df_return.head(3))

Unnamed: 0_level_0,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-07 10,0.005891,1
2020-04-07 11,-0.006678,0
2020-04-07 12,-0.002894,0


### Data engineering

In [28]:
df = df_wavelet.join(df_index).join(df_glassnode).join(df_ta).join(df_return).dropna(axis=0)*1
df.index = pd.to_datetime(df.index)
#df = df[1:]
display(df.shape, df.index.min(), df.index.max())
df.head(3)

(17054, 17)

Timestamp('2020-04-11 04:00:00')

Timestamp('2022-04-05 00:00:00')

Unnamed: 0_level_0,wavelet_smoothing,wavelet_clf,fng_value,fng_classification,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-04-11 04:00:00,7113.095232,0,15.0,-2.0,5.429081,11.700374,13.823167,0.0,11.085744,1,0,0,1,0,0,-0.000923,0
2020-04-11 05:00:00,7113.466363,0,15.0,-2.0,5.429081,11.700374,13.823167,0.0,11.085744,0,0,0,1,0,0,0.002531,1
2020-04-11 06:00:00,7112.908368,0,15.0,-2.0,5.429081,11.700374,13.823167,0.0,11.085744,0,0,0,1,0,0,-0.000737,0


#### Split

In [29]:
df_train = df[df.index.year < 2022]
df_test = df[df.index.year >= 2022]

n_train, n_test = len(df_train), len(df_test)
print(f'Train : {n_train} ({int(n_train/(n_train+n_test)*100)}%)')
print(f'Test : {n_test} ({int(n_test/(n_train+n_test)*100)}%)')

Train : 14810 (86%)
Test : 2244 (13%)


#### Normalize

In [30]:
def nomalize(df_train, df_test, features):  
    df_train_nomalize = df_train[features]
    df_test_nomalize = df_test[features]

    scaler = StandardScaler().fit(df_train_nomalize)
    df_train_nomalize = pd.DataFrame(scaler.transform(df_train_nomalize),
                               columns=df_train_nomalize.columns,index=df_train_nomalize.index)
    df_test_nomalize = pd.DataFrame(scaler.transform(df_test_nomalize),
                               columns=df_test_nomalize.columns,index=df_test_nomalize.index)
    
    df_train = df_train_nomalize.join(df_train.drop(columns=features))
    df_test = df_test_nomalize.join(df_test.drop(columns=features))
    
    return df_train, df_test

In [31]:
features = ['wavelet_smoothing', 'fng_value', 'glassnode_f1', 'glassnode_f2', 'glassnode_f3', 'glassnode_f4', 'glassnode_f5']
df_train, df_test = nomalize(df_train, df_test, features)

display(df_train.head(2))
display(df_train.head(2))

Unnamed: 0_level_0,wavelet_smoothing,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,wavelet_clf,fng_classification,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-04-11 04:00:00,-1.314619,-1.728126,0.189735,1.703855,3.391564,0.0,-0.866183,0,-2.0,1,0,0,1,0,0,-0.000923,0
2020-04-11 05:00:00,-1.314599,-1.728126,0.189735,1.703855,3.391564,0.0,-0.866183,0,-2.0,0,0,0,1,0,0,0.002531,1


Unnamed: 0_level_0,wavelet_smoothing,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,wavelet_clf,fng_classification,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-04-11 04:00:00,-1.314619,-1.728126,0.189735,1.703855,3.391564,0.0,-0.866183,0,-2.0,1,0,0,1,0,0,-0.000923,0
2020-04-11 05:00:00,-1.314599,-1.728126,0.189735,1.703855,3.391564,0.0,-0.866183,0,-2.0,0,0,0,1,0,0,0.002531,1


## 2. Test signal 

In [32]:
#df['test'] = df.apply(lambda row: row.fng_value * row.fng_classification, axis=1)

In [33]:
signals = df_train.drop(columns=['return','return_clf']).columns

In [34]:
results = {}
for signal in signals:
    # Get data ready for training
    X_train, y_train = df_train[signal].values.reshape(-1, 1), df_train.return_clf
    X_test, y_test = df_test[signal].values.reshape(-1, 1), df_test.return_clf
    
    accuracy = []
    for i in range(25):
        # Random Forest model
        rf = RandomForestClassifier()
        rf.fit(X_train, y_train)

        rf_pred = rf.predict(X_test)
        #cm = confusion_matrix(y_test, rf_pred)
        accuracy.append(accuracy_score(y_test, rf_pred))
        
    results[signal] = round(np.mean(accuracy), 2)

In [35]:
scores_signals = pd.DataFrame(results.values(), index=results.keys(), columns=['Accuracy']).T
scores_signals

Unnamed: 0,wavelet_smoothing,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,wavelet_clf,fng_classification,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond
Accuracy,0.51,0.5,0.49,0.5,0.51,0.52,0.49,0.51,0.51,0.52,0.51,0.56,0.52,0.52,0.52


#### Filter meaningful signals

In [36]:
counts = df_train.return_clf.value_counts().sort_index()
baseline = counts.argmax()

acc_baseline = round(counts.loc[baseline]/(counts.sum()),2)
print(f'Accuracy baseline : {acc_baseline}')

Accuracy baseline : 0.51


In [37]:
selected_features = scores_signals.T[scores_signals.T['Accuracy'] > acc_baseline]
selected_features

Unnamed: 0,Accuracy
glassnode_f4,0.52
condition_BBW,0.52
strat_BB,0.56
strat_MACD,0.52
strat_HH_LL,0.52
combine_strat_cond,0.52


In [38]:
df_signals = df_test[selected_features.index]

### Save signals

In [40]:
filepath = Path('../csv/rl_signals.csv')
df_signals.to_csv(filepath)