# Crypto Fear & Greed Index

### Import

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path 
import datatable as dt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

### Get signals

In [2]:
filepath = Path('../csv/signals_crypto_fear&greed_index.csv')

In [3]:
df_index = dt.fread(filepath).to_pandas() 
df_index = df_index.set_index('date')
df_index.head(3)

Unnamed: 0_level_0,fng_value,fng_classification
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-04-06,48,0
2022-04-05,53,0
2022-04-04,52,0


### Get target

In [4]:
filepath = Path('../../data/csv/crypto_market_data.csv')

In [5]:
# Get crypto data
df_crypto = dt.fread(filepath).to_pandas() 
df_crypto = df_crypto.T.set_index([0, 1]).T
df_crypto = df_crypto.set_index('')
df_crypto = df_crypto.astype(float)
df_crypto.index.names = ['date']

# Get the daily return (on the close price of the BTC)
df_close = df_crypto.T.swaplevel(0, 1).T['Close']['BTC-USD']
df_close.index = pd.to_datetime(df_close.index)
df_close = df_close[df_close.index.hour == 23]
df_close.index = df_close.index.date
df_retrun = df_close.pct_change()
df_retrun = df_retrun.dropna()

df_retrun.head(3)

2020-04-08    0.025418
2020-04-10   -0.063996
2020-04-15   -0.032432
Name: BTC-USD, dtype: float64

#### Binairy classification

In [6]:
df_retrun = (df_retrun > 0)*1

In [7]:
df = df_index.join(df_retrun, how='right').rename(columns={'BTC-USD':'target'})
df.head(3)

Unnamed: 0,fng_value,fng_classification,target
2020-04-08,21,-2,1
2020-04-10,15,-2,0
2020-04-15,18,-2,0


In [8]:
display(df.shape, df.index.min(), df.index.max())

(457, 3)

Timestamp('2020-04-08 00:00:00')

Timestamp('2022-04-06 00:00:00')

### Test signal 

In [9]:
df['test'] = df.apply(lambda row: row.fng_value * row.fng_classification, axis=1)

In [10]:
signal = ['fng_value','fng_classification', 'test']

#### 1. Get data ready for model

In [11]:
def split(df, normalize=False):
    df_train = df[df.index.year < 2022]
    df_test = df[df.index.year >= 2022]
    
    if normalize:
        labels_train = df_train.target
        labels_test = df_test.target
        
        scaler = StandardScaler().fit(df_train)
        
        df_train = pd.DataFrame(scaler.transform(df_train),columns=df_train.columns,index=df_train.index)
        df_test = pd.DataFrame(scaler.transform(df_test),columns=df_test.columns,index=df_test.index)
        
        df_train['target'] = labels_train
        df_test['target'] = labels_test
        
    return df_train, df_test

In [12]:
# Split data
df_train, df_test = split(df, normalize=True)

# Get data ready for training
if len(signal) == 1:
    X_train, y_train = df_train[signal].values.reshape(-1, 1), df_train.target
    X_test, y_test = df_test[signal].values.reshape(-1, 1), df_test.target
else:
    X_train, y_train = df_train[signal].values, df_train.target
    X_test, y_test = df_test[signal].values, df_test.target

In [13]:
X_train.shape, X_test.shape

((364, 3), (93, 3))

#### 2. Random Forest Classifier model

In [14]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

cm = confusion_matrix(y_test, rf_pred)
accuracy = accuracy_score(y_test, rf_pred)

print ("Confusion Matrix: \n", cm)
print ("\nAccuracy:", round(accuracy,2))

Confusion Matrix: 
 [[20 24]
 [29 20]]

Accuracy: 0.43


In [15]:
acc = []
for i in range(10):
    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    
    rf_pred = rf.predict(X_test)

    cm = confusion_matrix(y_test, rf_pred)
    accuracy = accuracy_score(y_test, rf_pred)
    acc.append(accuracy)
print ("\nAccuracy:", round(np.mean(acc),2))


Accuracy: 0.45
