In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

In [2]:
PATH = "/Users/admin/PycharmProjects/Market_Prediction_Using_News_and_Indicators/Crypto_Currencies_Data/"

### MLP

In [3]:
from sklearn.neural_network import MLPClassifier

#### No indicators

In [20]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [21]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)

In [22]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

In [23]:
precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.48952095808383234
recall = 1.0
accuracy = 0.48952095808383234
f1-score = 0.657286432160804


In [27]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.49096031078182634
recall = 0.9771309771309771
accuracy = 0.4987611569181689
f1-score = 0.6534871111596805


#### With Indicators

In [28]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [41]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)

In [42]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4920886075949367
recall = 0.9510703363914373
accuracy = 0.49550898203592814
f1-score = 0.64859228362878


In [43]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4844068105233599
recall = 0.9454282827776803
accuracy = 0.4859172618977805
f1-score = 0.64015080194381


### Reduce dim

In [44]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [49]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)

In [50]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.48952095808383234
recall = 1.0
accuracy = 0.48952095808383234
f1-score = 0.657286432160804


In [55]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4839390225308171
recall = 1.0
accuracy = 0.4839390225308171
f1-score = 0.652190007976255


### With Normalization

In [56]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [57]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)



In [58]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8333333333333334
recall = 0.7951070336391437
accuracy = 0.8218562874251497
f1-score = 0.8137715179968701


In [61]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")



precision = 0.7792127411753813
recall = 0.7211841127503777
accuracy = 0.7661875496025928
f1-score = 0.7490626583347408




### Random Forest

In [62]:
from sklearn.ensemble import RandomForestClassifier

### No indicators

In [63]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [64]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [66]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6265060240963856
recall = 0.636085626911315
accuracy = 0.6362275449101796
f1-score = 0.6312594840667678


In [142]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6327795056855157
recall = 0.6026018085939311
accuracy = 0.6381579269038015
f1-score = 0.6168112808493198


#### With Indicators

In [69]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [70]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [71]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8955696202531646
recall = 0.8654434250764526
accuracy = 0.8847305389221557
f1-score = 0.8802488335925349


In [72]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8704830682678784
recall = 0.8377355576150757
accuracy = 0.8610946583627528
f1-score = 0.8537696167072117


### Reduce dim

In [73]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [74]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [75]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.887459807073955
recall = 0.8440366972477065
accuracy = 0.8712574850299402
f1-score = 0.865203761755486


In [76]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8362892078681552
recall = 0.8285219881605423
accuracy = 0.838353213466102
f1-score = 0.8323852743261408


### With normalization

In [77]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [78]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [79]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8924050632911392
recall = 0.8623853211009175
accuracy = 0.8817365269461078
f1-score = 0.8771384136858477


In [80]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.863677811550152
recall = 0.8347944793727926
accuracy = 0.8561480825543527
f1-score = 0.8489647771609696


### SVM

In [81]:
from sklearn import svm

### No indicators

In [94]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [106]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [107]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.2857142857142857
recall = 0.0061162079510703364
accuracy = 0.5059880239520959
f1-score = 0.011976047904191617


In [108]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5086938663745892
recall = 0.2714475365077775
accuracy = 0.521002666322625
f1-score = 0.34259245867021504


### With indicators

In [129]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [117]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [118]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5649717514124294
recall = 0.3058103975535168
accuracy = 0.5449101796407185
f1-score = 0.39682539682539686


In [137]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5834457939721098
recall = 0.3725379689235111
accuracy = 0.5580770614153403
f1-score = 0.42820308057730505


### Reduce dim

In [114]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [115]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [116]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5649717514124294
recall = 0.3058103975535168
accuracy = 0.5449101796407185
f1-score = 0.39682539682539686


In [119]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6264803204458377
recall = 0.3618945637017926
accuracy = 0.582291923231802
f1-score = 0.44067471629472765


### With Normalization

In [122]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [123]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [124]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7408637873754153
recall = 0.6819571865443425
accuracy = 0.7275449101796407
f1-score = 0.7101910828025478


In [127]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7409862916286982
recall = 0.7242958764489825
accuracy = 0.7439408385282972
f1-score = 0.7321668038779057


### Boosting

In [138]:
from lightgbm import LGBMClassifier

### No indicators

In [139]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [140]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [141]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6079545454545454
recall = 0.654434250764526
accuracy = 0.624251497005988
f1-score = 0.6303387334315169


In [144]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.622616709973965
recall = 0.6135207071322175
accuracy = 0.631740618506295
f1-score = 0.6167506966021358


### With indicators

In [145]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [146]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [147]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9025974025974026
recall = 0.8501529051987767
accuracy = 0.8817365269461078
f1-score = 0.8755905511811023


In [148]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8739227268638311
recall = 0.8387366625083489
accuracy = 0.8630631952084098
f1-score = 0.8557720136364807


### Reduce dim

In [149]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [150]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [151]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8485804416403786
recall = 0.8226299694189603
accuracy = 0.8413173652694611
f1-score = 0.8354037267080746


In [152]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8378148176908307
recall = 0.8375720550620267
accuracy = 0.8423053416452756
f1-score = 0.8373023130304308


### With Normalization

In [153]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [154]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [155]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9025974025974026
recall = 0.8501529051987767
accuracy = 0.8817365269461078
f1-score = 0.8755905511811023


In [156]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8825064777992961
recall = 0.8437372561927962
accuracy = 0.8699951106221734
f1-score = 0.8625137742449482
