In [12]:
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

In [2]:
btc_trading_signals_df = pd.read_csv(Path("Resources/BitcoinTradingSignals.csv"), parse_dates=True, index_col='time')
eth_trading_signals_df = pd.read_csv(Path("Resources/EthereumTradingSignals.csv"), parse_dates=True, index_col='time')
ada_trading_signals_df = pd.read_csv(Path("Resources/CardanoTradingSignals.csv"), parse_dates=True, index_col='time')
usdt_trading_signals_df = pd.read_csv(Path("Resources/TetherTradingSignals.csv"), parse_dates=True, index_col='time')
ltc_trading_signals_df = pd.read_csv(Path("Resources/LitecoinTradingSignals.csv"), parse_dates=True, index_col='time')
dgb_trading_signals_df = pd.read_csv(Path("Resources/DigiByteTradingSignals.csv"), parse_dates=True, index_col='time')
xrp_trading_signals_df = pd.read_csv(Path("Resources/RippleTradingSignals.csv"), parse_dates=True, index_col='time')
trx_trading_signals_df = pd.read_csv(Path("Resources/TronTradingSignals.csv"), parse_dates=True, index_col='time')
tusd_trading_signals_df = pd.read_csv(Path("Resources/TrueUSDTradingSignals.csv"), parse_dates=True, index_col='time')
bsv_trading_signals_df = pd.read_csv(Path("Resources/BitcoinSVTradingSignals.csv"), parse_dates=True, index_col='time')

In [5]:
# Set x variable list of features
x_var_list = ['crossover_signal', 'vol_trend_signal', 'bollinger_signal']

# Filter by x-variable list
btc_trading_signals_df[x_var_list]
eth_trading_signals_df[x_var_list]
ada_trading_signals_df[x_var_list]
usdt_trading_signals_df[x_var_list]
xrp_trading_signals_df[x_var_list]
ltc_trading_signals_df[x_var_list]
trx_trading_signals_df[x_var_list]
tusd_trading_signals_df[x_var_list]
dgb_trading_signals_df[x_var_list]
bsv_trading_signals_df[x_var_list]

Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-20 00:00:00+00:00,0.0,0.0,0.0
2019-03-21 00:00:00+00:00,-1.0,0.0,0.0
2019-03-22 00:00:00+00:00,1.0,1.0,0.0
2019-03-23 00:00:00+00:00,1.0,-1.0,0.0
2019-03-24 00:00:00+00:00,1.0,-1.0,0.0
2019-03-25 00:00:00+00:00,-1.0,-1.0,0.0
2019-03-26 00:00:00+00:00,-1.0,-1.0,0.0
2019-03-27 00:00:00+00:00,-1.0,1.0,0.0
2019-03-28 00:00:00+00:00,-1.0,1.0,0.0
2019-03-29 00:00:00+00:00,-1.0,1.0,0.0


In [6]:
# Shift DataFrame values by 1
btc_trading_signals_df[x_var_list] = btc_trading_signals_df[x_var_list].shift(1)
eth_trading_signals_df[x_var_list] = eth_trading_signals_df[x_var_list].shift(1)
ada_trading_signals_df[x_var_list] = ada_trading_signals_df[x_var_list].shift(1)
usdt_trading_signals_df[x_var_list] = usdt_trading_signals_df[x_var_list].shift(1)
xrp_trading_signals_df[x_var_list] = xrp_trading_signals_df[x_var_list].shift(1)
ltc_trading_signals_df[x_var_list] = ltc_trading_signals_df[x_var_list].shift(1)
trx_trading_signals_df[x_var_list] = trx_trading_signals_df[x_var_list].shift(1)
tusd_trading_signals_df[x_var_list] = tusd_trading_signals_df[x_var_list].shift(1)
dgb_trading_signals_df[x_var_list] = dgb_trading_signals_df[x_var_list].shift(1)
bsv_trading_signals_df[x_var_list] = bsv_trading_signals_df[x_var_list].shift(1)

In [9]:
# Drop NAs and replace positive/negative infinity values
btc_trading_signals_df.dropna(subset=x_var_list, inplace=True)
btc_trading_signals_df.dropna(subset=['BTC daily_return'], inplace=True)
btc_trading_signals_df = btc_trading_signals_df.replace([np.inf, -np.inf], np.nan)

eth_trading_signals_df.dropna(subset=x_var_list, inplace=True)
eth_trading_signals_df.dropna(subset=['ETH daily_return'], inplace=True)
eth_trading_signals_df = eth_trading_signals_df.replace([np.inf, -np.inf], np.nan)

ada_trading_signals_df.dropna(subset=x_var_list, inplace=True)
ada_trading_signals_df.dropna(subset=['ADA daily_return'], inplace=True)
ada_trading_signals_df = ada_trading_signals_df.replace([np.inf, -np.inf], np.nan)

usdt_trading_signals_df.dropna(subset=x_var_list, inplace=True)
usdt_trading_signals_df.dropna(subset=['USDT daily_return'], inplace=True)
usdt_trading_signals_df = usdt_trading_signals_df.replace([np.inf, -np.inf], np.nan)

xrp_trading_signals_df.dropna(subset=x_var_list, inplace=True)
xrp_trading_signals_df.dropna(subset=['XRP daily_return'], inplace=True)
xrp_trading_signals_df = xrp_trading_signals_df.replace([np.inf, -np.inf], np.nan)

ltc_trading_signals_df.dropna(subset=x_var_list, inplace=True)
ltc_trading_signals_df.dropna(subset=['LTC daily_return'], inplace=True)
ltc_trading_signals_df = ltc_trading_signals_df.replace([np.inf, -np.inf], np.nan)

trx_trading_signals_df.dropna(subset=x_var_list, inplace=True)
trx_trading_signals_df.dropna(subset=['TRX daily_return'], inplace=True)
trx_trading_signals_df = trx_trading_signals_df.replace([np.inf, -np.inf], np.nan)

tusd_trading_signals_df.dropna(subset=x_var_list, inplace=True)
tusd_trading_signals_df.dropna(subset=['TUSD daily_return'], inplace=True)
tusd_trading_signals_df = tusd_trading_signals_df.replace([np.inf, -np.inf], np.nan)

dgb_trading_signals_df.dropna(subset=x_var_list, inplace=True)
dgb_trading_signals_df.dropna(subset=['DGB daily_return'], inplace=True)
dgb_trading_signals_df = dgb_trading_signals_df.replace([np.inf, -np.inf], np.nan)

bsv_trading_signals_df.dropna(subset=x_var_list, inplace=True)
bsv_trading_signals_df.dropna(subset=['BSV daily_return'], inplace=True)
bsv_trading_signals_df = bsv_trading_signals_df.replace([np.inf, -np.inf], np.nan)


In [10]:
# Construct the dependent variable where if daily return is greater than 0, then 1, else, 0.
btc_trading_signals_df['Positive Return'] = np.where(btc_trading_signals_df['BTC daily_return'] > 0, 1.0, 0.0)

eth_trading_signals_df['Positive Return'] = np.where(eth_trading_signals_df['ETH daily_return'] > 0, 1.0, 0.0)

ada_trading_signals_df['Positive Return'] = np.where(ada_trading_signals_df['ADA daily_return'] > 0, 1.0, 0.0)

usdt_trading_signals_df['Positive Return'] = np.where(usdt_trading_signals_df['USDT daily_return'] > 0, 1.0, 0.0)

xrp_trading_signals_df['Positive Return'] = np.where(xrp_trading_signals_df['XRP daily_return'] > 0, 1.0, 0.0)

ltc_trading_signals_df['Positive Return'] = np.where(ltc_trading_signals_df['LTC daily_return'] > 0, 1.0, 0.0)

trx_trading_signals_df['Positive Return'] = np.where(trx_trading_signals_df['TRX daily_return'] > 0, 1.0, 0.0)

tusd_trading_signals_df['Positive Return'] = np.where(tusd_trading_signals_df['TUSD daily_return'] > 0, 1.0, 0.0)

dgb_trading_signals_df['Positive Return'] = np.where(dgb_trading_signals_df['DGB daily_return'] > 0, 1.0, 0.0)

bsv_trading_signals_df['Positive Return'] = np.where(bsv_trading_signals_df['BSV daily_return'] > 0, 1.0, 0.0)


In [22]:
# Construct training start and end dates
training_start = btc_trading_signals_df.index.min().strftime(format= '%Y-%m-%d')
training_end = '2021-03-20'

# Construct testing start and end dates
testing_start =  '2021-03-15'
testing_end = btc_trading_signals_df.index.max().strftime(format= '%Y-%m-%d')

In [23]:
# Construct the X_train and y_train datasets
btc_X_train = btc_trading_signals_df[x_var_list][training_start:training_end]
btc_y_train = btc_trading_signals_df['Positive Return'][training_start:training_end]

eth_X_train = eth_trading_signals_df[x_var_list][training_start:training_end]
eth_y_train = eth_trading_signals_df['Positive Return'][training_start:training_end]

ada_X_train = ada_trading_signals_df[x_var_list][training_start:training_end]
ada_y_train = ada_trading_signals_df['Positive Return'][training_start:training_end]

usdt_X_train = usdt_trading_signals_df[x_var_list][training_start:training_end]
usdt_y_train = usdt_trading_signals_df['Positive Return'][training_start:training_end]

xrp_X_train = xrp_trading_signals_df[x_var_list][training_start:training_end]
xrp_y_train = xrp_trading_signals_df['Positive Return'][training_start:training_end]

ltc_X_train = ltc_trading_signals_df[x_var_list][training_start:training_end]
ltc_y_train = ltc_trading_signals_df['Positive Return'][training_start:training_end]

trx_X_train = trx_trading_signals_df[x_var_list][training_start:training_end]
trx_y_train = trx_trading_signals_df['Positive Return'][training_start:training_end]

tusd_X_train = tusd_trading_signals_df[x_var_list][training_start:training_end]
tusd_y_train = tusd_trading_signals_df['Positive Return'][training_start:training_end]

dgb_X_train = dgb_trading_signals_df[x_var_list][training_start:training_end]
dgb_y_train = dgb_trading_signals_df['Positive Return'][training_start:training_end]

bsv_X_train = bsv_trading_signals_df[x_var_list][training_start:training_end]
bsv_y_train = bsv_trading_signals_df['Positive Return'][training_start:training_end]

In [24]:
# Construct the X test and y test datasets
btc_X_test = btc_trading_signals_df[x_var_list][training_start:training_end]
btc_y_test = btc_trading_signals_df['Positive Return'][training_start:training_end]

eth_X_test = eth_trading_signals_df[x_var_list][training_start:training_end]
eth_y_test = eth_trading_signals_df['Positive Return'][training_start:training_end]

ada_X_test = ada_trading_signals_df[x_var_list][training_start:training_end]
ada_y_test = ada_trading_signals_df['Positive Return'][training_start:training_end]

usdt_X_test = usdt_trading_signals_df[x_var_list][training_start:training_end]
usdt_y_test = usdt_trading_signals_df['Positive Return'][training_start:training_end]

xrp_X_test = xrp_trading_signals_df[x_var_list][training_start:training_end]
xrp_y_test = xrp_trading_signals_df['Positive Return'][training_start:training_end]

ltc_X_test = ltc_trading_signals_df[x_var_list][training_start:training_end]
ltc_y_test = ltc_trading_signals_df['Positive Return'][training_start:training_end]

trx_X_test = trx_trading_signals_df[x_var_list][training_start:training_end]
trx_y_test = trx_trading_signals_df['Positive Return'][training_start:training_end]

tusd_X_test = tusd_trading_signals_df[x_var_list][training_start:training_end]
tusd_y_test = tusd_trading_signals_df['Positive Return'][training_start:training_end]

dgb_X_test = dgb_trading_signals_df[x_var_list][training_start:training_end]
dgb_y_test = dgb_trading_signals_df['Positive Return'][training_start:training_end]

bsv_X_test = bsv_trading_signals_df[x_var_list][training_start:training_end]
bsv_y_test = bsv_trading_signals_df['Positive Return'][training_start:training_end]

In [25]:
# Fit a SKLearn linear regression using just the training set (X_train, Y_train):
btc_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
btc_model.fit(btc_X_train, btc_y_train)

eth_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
eth_model.fit(eth_X_train, eth_y_train)

ada_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
ada_model.fit(ada_X_train, ada_y_train)

usdt_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
usdt_model.fit(usdt_X_train, usdt_y_train)

xrp_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
xrp_model.fit(xrp_X_train, xrp_y_train)

ltc_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
ltc_model.fit(ltc_X_train, ltc_y_train)

trx_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
trx_model.fit(trx_X_train, trx_y_train)

tusd_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
tusd_model.fit(tusd_X_train, tusd_y_train)

dgb_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
dgb_model.fit(dgb_X_train, dgb_y_train)

bsv_model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0)
bsv_model.fit(bsv_X_train, bsv_y_train)

# Make a prediction of "y" values from the X_test dataset
btc_predictions = btc_model.predict(btc_X_test)

eth_predictions = eth_model.predict(eth_X_test)

ada_predictions = ada_model.predict(ada_X_test)

usdt_predictions = usdt_model.predict(usdt_X_test)

xrp_predictions = xrp_model.predict(xrp_X_test)

ltc_predictions = ltc_model.predict(ltc_X_test)

trx_predictions = trx_model.predict(trx_X_test)

tusd_predictions = tusd_model.predict(tusd_X_test)

dgb_predictions = dgb_model.predict(dgb_X_test)

bsv_predictions = bsv_model.predict(bsv_X_test)

# Assemble actual y data (Y_test) with predicted y data (from just above) into two columns in a dataframe:
btc_results = btc_y_test.to_frame()
btc_results["Predicted Value"] = btc_predictions

eth_results = eth_y_test.to_frame()
eth_results["Predicted Value"] = eth_predictions

ada_results = ada_y_test.to_frame()
ada_results["Predicted Value"] = ada_predictions

usdt_results = usdt_y_test.to_frame()
usdt_results["Predicted Value"] = usdt_predictions

xrp_results = xrp_y_test.to_frame()
xrp_results["Predicted Value"] = xrp_predictions

ltc_results = ltc_y_test.to_frame()
ltc_results["Predicted Value"] = ltc_predictions

trx_results = trx_y_test.to_frame()
trx_results["Predicted Value"] = trx_predictions

tusd_results = tusd_y_test.to_frame()
tusd_results["Predicted Value"] = tusd_predictions

dgb_results = dgb_y_test.to_frame()
dgb_results["Predicted Value"] = dgb_predictions

bsv_results = bsv_y_test.to_frame()
bsv_results["Predicted Value"] = bsv_predictions
ada_results

Unnamed: 0_level_0,Positive Return,Predicted Value
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-03-21 00:00:00+00:00,0.0,1.0
2019-03-22 00:00:00+00:00,1.0,1.0
2019-03-23 00:00:00+00:00,1.0,1.0
2019-03-24 00:00:00+00:00,0.0,1.0
2019-03-25 00:00:00+00:00,0.0,1.0
2019-03-26 00:00:00+00:00,1.0,1.0
2019-03-27 00:00:00+00:00,1.0,1.0
2019-03-28 00:00:00+00:00,0.0,1.0
2019-03-29 00:00:00+00:00,1.0,1.0
2019-03-30 00:00:00+00:00,1.0,1.0


In [26]:
# Save the pre-trained model
from joblib import dump, load
dump(btc_model, 'btc_random_forest_model.joblib')
dump(eth_model, 'eth_random_forest_model.joblib')
dump(ada_model, 'ada_random_forest_model.joblib')
dump(usdt_model, 'usdt_random_forest_model.joblib')
dump(xrp_model, 'xrp_random_forest_model.joblib')
dump(ltc_model, 'ltc_random_forest_model.joblib')
dump(trx_model, 'trx_random_forest_model.joblib')
dump(tusd_model, 'tusd_random_forest_model.joblib')
dump(dgb_model, 'dgb_random_forest_model.joblib')
dump(bsv_model, 'bsv_random_forest_model.joblib')

['bsv_random_forest_model.joblib']

In [None]:
#classification report
#Confusion matrix
