In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

In [2]:
PATH = "/Users/admin/PycharmProjects/Market_Prediction_Using_News_and_Indicators/Crypto_Currencies_Data/"

### MLP

In [3]:
from sklearn.neural_network import MLPClassifier

#### No indicators

In [157]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [170]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)

In [171]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5119760479041916
recall = 1.0
accuracy = 0.5119760479041916
f1-score = 0.6772277227722773


In [172]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5022294288518001
recall = 1.0
accuracy = 0.5022294288518001
f1-score = 0.668584843142945


#### With Indicators

In [173]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [184]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)

In [185]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5122324159021406
recall = 0.97953216374269
accuracy = 0.5119760479041916
f1-score = 0.6726907630522089


In [186]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5023052638940229
recall = 0.9892659440852212
accuracy = 0.5022294288518001
f1-score = 0.6662009921109252


### Reduce dim

In [192]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [195]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)

In [196]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5119760479041916
recall = 1.0
accuracy = 0.5119760479041916
f1-score = 0.6772277227722773


In [203]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5022294288518001
recall = 1.0
accuracy = 0.5022294288518001
f1-score = 0.668584843142945


### With Normalization

In [204]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [205]:
mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_eth.fit(X_eth_train, y_eth_train)



In [206]:
y_predict_eth = mlp_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.839541547277937
recall = 0.8567251461988304
accuracy = 0.842814371257485
f1-score = 0.8480463096960926


In [207]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    mlp_model_eth = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = mlp_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")



precision = 0.7616531165311653
recall = 0.7746197143787505
accuracy = 0.764695073558446
f1-score = 0.7672408859383049




### Random Forest

In [62]:
from sklearn.ensemble import RandomForestClassifier

### No indicators

In [208]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [209]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [210]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6468926553672316
recall = 0.6695906432748538
accuracy = 0.6437125748502994
f1-score = 0.6580459770114941


In [211]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.648989009981278
recall = 0.6253515880776421
accuracy = 0.6426219288595527
f1-score = 0.6367693976165198


#### With Indicators

In [212]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [213]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [214]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9142011834319527
recall = 0.9035087719298246
accuracy = 0.907185628742515
f1-score = 0.9088235294117647


In [215]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8764093329157532
recall = 0.8726566497650835
accuracy = 0.8739424629471075
f1-score = 0.8742053501716346


### Reduce dim

In [216]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [217]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [218]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9009009009009009
recall = 0.8771929824561403
accuracy = 0.8877245508982036
f1-score = 0.8888888888888888


In [219]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8456360078277887
recall = 0.8626552542215193
accuracy = 0.8517009535426514
f1-score = 0.8538255687696109


### With normalization

In [220]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [221]:
rf_model_eth = RandomForestClassifier(n_estimators=1000)
rf_model_eth.fit(X_eth_train, y_eth_train)

In [222]:
y_predict_eth = rf_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9196428571428571
recall = 0.9035087719298246
accuracy = 0.9101796407185628
f1-score = 0.911504424778761


In [223]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    rf_model_eth = RandomForestClassifier(n_estimators=1000)
    rf_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = rf_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8704383719268981
recall = 0.8726178846660775
accuracy = 0.8704834762278963
f1-score = 0.871189773844641


### SVM

In [81]:
from sklearn import svm

### No indicators

In [224]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [227]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [228]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5537974683544303
recall = 0.5116959064327485
accuracy = 0.5389221556886228
f1-score = 0.5319148936170213


In [229]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5275728414862337
recall = 0.5696996480129011
accuracy = 0.5274304781787688
f1-score = 0.5469940393952553


### With indicators

In [242]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [245]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [246]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5338645418326693
recall = 0.391812865497076
accuracy = 0.5134730538922155
f1-score = 0.45193929173693076


In [235]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5199677121771218
recall = 0.541490285466189
accuracy = 0.5071691629232591
f1-score = 0.4929373648842619


### Reduce dim

In [236]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [239]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [240]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.528125
recall = 0.49415204678362573
accuracy = 0.5149700598802395
f1-score = 0.5105740181268881


In [241]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5650129480714189
recall = 0.6071428571428571
accuracy = 0.5136043052118398
f1-score = 0.49020594878076784


### With Normalization

In [247]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [248]:
svm_model_eth = svm.SVC(kernel='rbf')

svm_model_eth.fit(X_eth_train, y_eth_train)

In [249]:
y_predict_eth = svm_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7887323943661971
recall = 0.8187134502923976
accuracy = 0.7949101796407185
f1-score = 0.8034433285509325


In [250]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    svm_model_eth = svm.SVC(kernel='rbf')
    svm_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = svm_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7769562325912818
recall = 0.7953560622986432
accuracy = 0.7824728028358392
f1-score = 0.7858977849456178


### Boosting

In [138]:
from lightgbm import LGBMClassifier

### No indicators

In [251]:
X_eth = pd.read_csv(PATH + "/ETH.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [252]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [253]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6396648044692738
recall = 0.6695906432748538
accuracy = 0.6377245508982036
f1-score = 0.6542857142857144


In [254]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_eth)):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6472578437806048
recall = 0.6107021557698205
accuracy = 0.6376677667766777
f1-score = 0.6277006119858168


### With indicators

In [255]:
X_eth = pd.read_csv(PATH + "/ETH_indicators.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [256]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [257]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8905775075987842
recall = 0.8567251461988304
accuracy = 0.8727544910179641
f1-score = 0.8733233979135618


In [258]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in tqdm(enumerate(folds.split(X_eth))):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

0it [00:00, ?it/s]

precision = 0.893513217430888
recall = 0.8726547015590136
accuracy = 0.8833259992665934
f1-score = 0.8825434217273298


### Reduce dim

In [259]:
X_eth = pd.read_csv(PATH + "/ETH_reduce.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [260]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [261]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8693009118541033
recall = 0.8362573099415205
accuracy = 0.8517964071856288
f1-score = 0.8524590163934427


In [262]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in tqdm(enumerate(folds.split(X_eth))):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

0it [00:00, ?it/s]

precision = 0.8409054772159917
recall = 0.8647644998708801
accuracy = 0.8492213665811026
f1-score = 0.8520667746930096


### With Normalization

In [263]:
X_eth = pd.read_csv(PATH + "/ETH_normalization.csv", index_col='time')
y_eth = pd.read_csv(PATH + "/ETH_target.csv", index_col='time')
y_eth = y_eth.values.ravel()

X_eth_train, X_eth_test, y_eth_train, y_eth_test = train_test_split(X_eth, y_eth, test_size=0.33, random_state=42)

In [264]:
boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_eth.fit(X_eth_train, y_eth_train)

In [265]:
y_predict_eth = boosting_model_eth.predict(X_eth_test)

precision = precision_score(y_eth_test, y_predict_eth)
recall = recall_score(y_eth_test, y_predict_eth)
accuracy = accuracy_score(y_eth_test, y_predict_eth)
f1 = f1_score(y_eth_test, y_predict_eth)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8858858858858859
recall = 0.8625730994152047
accuracy = 0.8727544910179641
f1-score = 0.874074074074074


In [266]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in tqdm(enumerate(folds.split(X_eth))):
    boosting_model_eth = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_eth.fit(X_eth.iloc[train_index], y_eth[train_index])
    y_predict_btc = boosting_model_eth.predict(X_eth.iloc[test_index])

    precision_list.append(precision_score(y_eth[test_index], y_predict_btc))
    recall_list.append(recall_score(y_eth[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_eth[test_index], y_predict_btc))
    f1_list.append(f1_score(y_eth[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

0it [00:00, ?it/s]

precision = 0.8968865223889761
recall = 0.8615599198392918
accuracy = 0.8803654809925436
f1-score = 0.8784489530271037
