# Test signals

### Import

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path 
import datatable as dt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

## 1. Data

### Get signals

In [2]:
def reshape_date(date):
    day, month, year = date.split('/')
    day, month, year = int(day), int(month), int(year)+2000
    return pd.Timestamp(year, month, day)

def display_info(df):
    print(f'Shape : {df.shape}')
    print(f'Start date : {df.index.min()}')
    print(f'End date : {df.index.max()}')
    display(df.tail(3))
    
def get_data(filepath, resample=False, reshapedate=False):
    df = dt.fread(filepath).to_pandas() 
    if reshapedate:
        df['date'] = df.apply(lambda row: reshape_date(row.date), axis=1)
    df = df.set_index('date')
    if resample:
        df = df.resample('H').ffill()
    df.index = pd.to_datetime(df.index).strftime('%Y-%m-%d %H')
    df = df[~df.index.duplicated(keep='first')]
    display_info(df)
    return df

#### Market data

In [3]:
filepath = Path('../csv/signals_btc_strat.csv')
df_ta = get_data(filepath, resample=False)

filepath = Path('../csv/signals_wavelets.csv')
df_wavelets = get_data(filepath, resample=False)
df_wavelets['wavelet_smoothing'] = df_wavelets['wavelet_smoothing'].pct_change()

filepath = Path('../csv/signals_HMM_regime_detection.csv')
df_hmm = get_data(filepath, resample=False)

filepath = Path('../csv/signals_OpenInterest.csv')
df_openinterest = get_data(filepath, resample=True)

Shape : (17130, 6)
Start date : 2020-04-09 20
End date : 2022-04-07 00


Unnamed: 0_level_0,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-06 19,True,True,0,-1,False,False
2022-04-06 20,True,True,0,-1,False,False
2022-04-07 00,True,True,0,-1,False,False


Shape : (17099, 2)
Start date : 2020-04-11 04
End date : 2022-04-07 00


Unnamed: 0_level_0,wavelet_smoothing,wavelet_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-04-06 19,41793.751592,True
2022-04-06 20,41799.037093,True
2022-04-07 00,41798.827121,True


Shape : (17887, 1)
Start date : 2020-04-01 00
End date : 2022-04-17 00


Unnamed: 0_level_0,HMM_detected_regime
date,Unnamed: 1_level_1
2022-04-16 22,False
2022-04-16 23,False
2022-04-17 00,False


Shape : (18721, 1)
Start date : 2020-02-28 00
End date : 2022-04-18 00


Unnamed: 0_level_0,OpenInterest
date,Unnamed: 1_level_1
2022-04-17 22,17508900000.0
2022-04-17 23,17508900000.0
2022-04-18 00,17618810000.0


In [4]:
df_wavelets.head(3)

Unnamed: 0_level_0,wavelet_smoothing,wavelet_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-11 04,,False
2020-04-11 05,5.2e-05,False
2020-04-11 06,-7.8e-05,False


#### Alternative data

In [5]:
filepath = Path('../csv/signals_glassnode.csv')
df_glassnode = get_data(filepath, resample=True)

filepath = Path('../csv/Twitter_Crypto_Signal.csv')
df_twitter = get_data(filepath, resample=True, reshapedate=True)

filepath = Path('../csv/signals_crypto_fear&greed_index.csv')
df_index = get_data(filepath, resample=True)

Shape : (19801, 5)
Start date : 2020-01-01 00
End date : 2022-04-05 00


Unnamed: 0_level_0,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-04 22,6.099674,6.561471,4.737371,0.0,14.345889
2022-04-04 23,6.099674,6.561471,4.737371,0.0,14.345889
2022-04-05 00,6.792104,2.989913,4.85569,0.0,10.875053


Shape : (36601, 1)
Start date : 2018-02-01 00
End date : 2022-04-06 00


Unnamed: 0_level_0,Twitter Sentiment score
date,Unnamed: 1_level_1
2022-04-05 22,0.43
2022-04-05 23,0.43
2022-04-06 00,0.31


Shape : (36601, 2)
Start date : 2018-02-01 00
End date : 2022-04-06 00


Unnamed: 0_level_0,fng_value,fng_classification
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-04-05 22,53,0
2022-04-05 23,53,0
2022-04-06 00,48,0


### Get target

#### Close price

In [6]:
filepath = Path('../../data/csv/crypto_market_data.csv')
# Get crypto data
df_crypto = dt.fread(filepath).to_pandas() 
df_crypto = df_crypto.T.set_index([0, 1]).T
df_crypto = df_crypto.set_index('')
df_crypto = df_crypto.astype(float)
df_crypto.index.names = ['date']

# Get the daily return (on the close price of the BTC)
df_close = df_crypto.T.swaplevel(0, 1).T['Close']['BTC-USD']
df_close.index = pd.to_datetime(df_close.index)
#df_close = df_close[df_close.index.hour == 23]
#df_close.index = df_close.index.date
df_close.index = pd.to_datetime(df_close.index).strftime('%Y-%m-%d %H')
df_close = pd.DataFrame(df_close, index=df_close.index, columns=['BTC-USD'])
display(df_close.tail(3))

Unnamed: 0_level_0,BTC-USD
date,Unnamed: 1_level_1
2022-04-07 09,43502.988281
2022-04-07 09,43502.988281
2022-04-07 09,43482.363281


#### Binairy classification

In [7]:
df_return = df_close.pct_change().rename(columns={'BTC-USD':'return'})
df_return = df_return.dropna()
df_return['return_clf'] = (df_return > 0)*1
display(df_return.head(3))

Unnamed: 0_level_0,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-07 10,0.005891,1
2020-04-07 11,-0.006678,0
2020-04-07 12,-0.002894,0


### Data engineering

In [8]:
#pd.concat([df_ta, df_wavelet, df_hmm, df_openinterest, df_glassnode, df_twitter, df_index, df_return]).dropna()

In [9]:
df = df_wavelets.join(df_ta).join(df_hmm).join(df_openinterest).join(df_glassnode).join(df_twitter).join(df_index).join(df_return).dropna(axis=0)*1
df.index = pd.to_datetime(df.index)
display(df.shape, df.index.min(), df.index.max())
df.head(3)

(17035, 20)

Timestamp('2020-04-11 05:00:00')

Timestamp('2022-04-05 00:00:00')

Unnamed: 0_level_0,wavelet_smoothing,wavelet_clf,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,HMM_detected_regime,OpenInterest,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,Twitter Sentiment score,fng_value,fng_classification,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-11 05:00:00,5.2e-05,0,0,0,0,1,0,0,0,1915603000.0,5.429081,11.700374,13.823167,0.0,11.085744,0.0,15.0,-2.0,0.002531,1
2020-04-11 06:00:00,-7.8e-05,0,0,0,0,1,0,0,0,1915603000.0,5.429081,11.700374,13.823167,0.0,11.085744,0.0,15.0,-2.0,-0.000737,0
2020-04-11 07:00:00,0.000191,0,0,0,0,1,0,0,0,1915603000.0,5.429081,11.700374,13.823167,0.0,11.085744,0.0,15.0,-2.0,-0.005555,0


#### Split

In [10]:
df_train = df[df.index < '20210101']
df_test = df[(df.index >= '20210101')&(df.index < '20210401')]
df_rl = df[df.index >= '20210401']

n_train, n_test = len(df_train), len(df_test)
print(f'Train : {n_train} ({int(n_train/(n_train+n_test)*100)}%)')
print(f'Test : {n_test} ({int(n_test/(n_train+n_test)*100)}%)')

Train : 6160 (74%)
Test : 2155 (25%)


#### Standarazation

In [11]:
def Standarazation(df_train, df_test, features):  
    df_train_nomalize = df_train[features]
    df_test_nomalize = df_test[features]

    scaler = StandardScaler().fit(df_train_nomalize)
    df_train_nomalize = pd.DataFrame(scaler.transform(df_train_nomalize),
                               columns=df_train_nomalize.columns,index=df_train_nomalize.index)
    df_test_nomalize = pd.DataFrame(scaler.transform(df_test_nomalize),
                               columns=df_test_nomalize.columns,index=df_test_nomalize.index)
    
    df_train = df_train_nomalize.join(df_train.drop(columns=features))
    df_test = df_test_nomalize.join(df_test.drop(columns=features))
    
    return scaler, df_train, df_test

In [12]:
features = ['wavelet_smoothing', 'OpenInterest', 'fng_value', 'glassnode_f1', 'glassnode_f2', 'glassnode_f3', 'glassnode_f4', 'glassnode_f5', 'Twitter Sentiment score']
scaler, df_train, df_test = Standarazation(df_train, df_test, features)

display(df_train.head(2))
display(df_train.head(2))

Unnamed: 0_level_0,wavelet_smoothing,OpenInterest,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,Twitter Sentiment score,wavelet_clf,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,HMM_detected_regime,fng_classification,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-11 05:00:00,-0.10848,-1.553528,-1.952925,1.749049,2.168705,2.902719,0.0,-1.864898,-0.283404,0,0,0,0,1,0,0,0,-2.0,0.002531,1
2020-04-11 06:00:00,-0.202685,-1.553528,-1.952925,1.749049,2.168705,2.902719,0.0,-1.864898,-0.283404,0,0,0,0,1,0,0,0,-2.0,-0.000737,0


Unnamed: 0_level_0,wavelet_smoothing,OpenInterest,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,Twitter Sentiment score,wavelet_clf,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,HMM_detected_regime,fng_classification,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-11 05:00:00,-0.10848,-1.553528,-1.952925,1.749049,2.168705,2.902719,0.0,-1.864898,-0.283404,0,0,0,0,1,0,0,0,-2.0,0.002531,1
2020-04-11 06:00:00,-0.202685,-1.553528,-1.952925,1.749049,2.168705,2.902719,0.0,-1.864898,-0.283404,0,0,0,0,1,0,0,0,-2.0,-0.000737,0


In [13]:
df_rl_nomalize = df_rl[features]
df_rl_nomalize = pd.DataFrame(scaler.transform(df_rl_nomalize),
                               columns=df_rl_nomalize.columns,index=df_rl_nomalize.index)
df_rl = df_rl_nomalize.join(df_rl.drop(columns=features))

In [14]:
display(df_rl.head(2))

Unnamed: 0_level_0,wavelet_smoothing,OpenInterest,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,Twitter Sentiment score,wavelet_clf,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,HMM_detected_regime,fng_classification,return,return_clf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-04-01 00:00:00,0.138314,10.533728,0.656748,0.978479,0.200756,-0.482215,0.0,0.906367,-0.283404,1,1,1,0,1,0,0,0,1.0,0.008986,1
2021-04-01 01:00:00,0.18952,10.533728,0.656748,0.978479,0.200756,-0.482215,0.0,0.906367,-0.283404,1,1,1,0,1,0,0,0,1.0,-0.000161,0


## 2. Test signal 

In [15]:
#df['test'] = df.apply(lambda row: row.fng_value * row.fng_classification, axis=1)

In [16]:
signals = df_train.drop(columns=['return','return_clf']).columns

In [17]:
results = {}
for signal in signals:
    # Get data ready for training
    X_train, y_train = df_train[signal].values.reshape(-1, 1), df_train.return_clf
    X_test, y_test = df_test[signal].values.reshape(-1, 1), df_test.return_clf
    
    # Random Forest model
    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)
    rf_pred = rf.predict(X_test)
    accuracy = accuracy_score(y_test, rf_pred)
        
    results[signal] = round(accuracy, 2)

In [52]:
#market_data = ['wavelet_smoothing','wavelet_clf','open_interest', 'condition_BBW', 'condition_Vol', 'strat_BB', 'strat_MACD', 'strat_HH_LL','combine_strat_cond', 'regime_detection', 'image_CNN']
#alternative_data = ['fng_classification', 'fng_value', 'glassnode_f1','glassnode_f2', 'glassnode_f3', 'glassnode_f4', 'glassnode_f5','twitter_sentiment']

In [53]:
#display(pd.DataFrame(alternative_data, columns=['Alternative data signals']))
#display(pd.DataFrame(market_data, columns=['Market data signals']))

In [18]:
scores_signals = pd.DataFrame(results.values(), index=results.keys(), columns=['Accuracy']).T
scores_signals

Unnamed: 0,wavelet_smoothing,OpenInterest,fng_value,glassnode_f1,glassnode_f2,glassnode_f3,glassnode_f4,glassnode_f5,Twitter Sentiment score,wavelet_clf,condition_BBW,condition_Vol,strat_BB,strat_MACD,strat_HH_LL,combine_strat_cond,HMM_detected_regime,fng_classification
Accuracy,0.5,0.53,0.5,0.5,0.52,0.52,0.53,0.51,0.52,0.53,0.53,0.53,0.57,0.54,0.53,0.53,0.53,0.53


#### Filter meaningful signals

In [32]:
counts_train = df_train.return_clf.value_counts().sort_index()
baseline = counts_train.argmax()

acc_baseline = round(counts_train.loc[baseline]/(counts_train.sum()),2)
print(f'Accuracy baseline : {acc_baseline}')

Accuracy baseline : 0.52


In [48]:
selected_features = scores_signals.T[scores_signals.T['Accuracy'] > acc_baseline]
display(selected_features)
#display(selected_features.tail(5))

Unnamed: 0,Accuracy
OpenInterest,0.53
glassnode_f4,0.53
wavelet_clf,0.53
condition_BBW,0.53
condition_Vol,0.53
strat_BB,0.57
strat_MACD,0.54
strat_HH_LL,0.53
combine_strat_cond,0.53
HMM_detected_regime,0.53


In [54]:
#market_data_selected = ['wavelet_clf','open_interest','condition_BBW','condition_Vol','strat_BB','strat_MACD','strat_HH_LL','combine_strat_cond','regime_detection']
#values = [0.53, 0.53, 0.53,0.53, 0.57,0.54,0.53,0.53, 0.53]
#display(pd.DataFrame(values, index=market_data_selected, columns=['Accuracy']))

#alternative_data_selected = ['glassnode_f4', 'fng_clf']
#values = [0.53, 0.53]
#display(pd.DataFrame(values, index=alternative_data_selected, columns=['Accuracy']))

In [55]:
df_signals = df_rl
df_signals_selected = df_signals[selected_features.index]

### Save signals

In [56]:
filepath = Path('../csv/rl_signals_all.csv')
df_signals_selected.to_csv(filepath)

In [57]:
filepath = Path('../csv/rl_signals_selected.csv')
df_signals_selected.to_csv(filepath)