## Build Models

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

In [40]:
PATH = "/Users/admin/PycharmProjects/Market_Prediction_Using_News_and_Indicators/Crypto_Currencies_Data/"

### MLP

In [39]:
from sklearn.neural_network import MLPClassifier

#### No indicators

In [54]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [56]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [57]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

In [58]:
precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5311284046692607
recall = 0.8348623853211009
accuracy = 0.5583832335329342
f1-score = 0.6492271105826397


In [65]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4525803480407061
recall = 0.5953815261044176
accuracy = 0.4760260650629635
f1-score = 0.4542980827542905


#### With Indicators

In [66]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [77]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [78]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5
recall = 0.8501529051987767
accuracy = 0.5104790419161677
f1-score = 0.6296715741789354


In [82]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5407979330649523
recall = 0.6928399669363525
accuracy = 0.5392759683012553
f1-score = 0.5706195773273837


### Reduce dim

In [83]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [87]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [88]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.48952095808383234
recall = 1.0
accuracy = 0.48952095808383234
f1-score = 0.657286432160804


In [86]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4839390225308171
recall = 1.0
accuracy = 0.4839390225308171
f1-score = 0.652190007976255


### With Normalization

In [89]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [92]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)



In [93]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.87248322147651
recall = 0.7951070336391437
accuracy = 0.842814371257485
f1-score = 0.832


In [95]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")



precision = 0.7781876968993533
recall = 0.7423999532433267
accuracy = 0.7726050988533248
f1-score = 0.7590561866614136




### Random Forest

In [96]:
from sklearn.ensemble import RandomForestClassifier

### No indicators

In [97]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [98]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [99]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5827814569536424
recall = 0.5382262996941896
accuracy = 0.5853293413173652
f1-score = 0.559618441971383


In [100]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5849836048643205
recall = 0.5764847331112392
accuracy = 0.5966385569017487
f1-score = 0.5801829268292683


### With indicators

In [101]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [102]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [103]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9056603773584906
recall = 0.8807339449541285
accuracy = 0.8967065868263473
f1-score = 0.8930232558139536


In [105]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8962086042783509
recall = 0.8816440264716127
accuracy = 0.893228559915009
f1-score = 0.8887951541149824


### Reduce dim

In [106]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [107]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [108]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8971061093247589
recall = 0.8532110091743119
accuracy = 0.8802395209580839
f1-score = 0.8746081504702194


In [109]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8813018511944675
recall = 0.8756530825496341
accuracy = 0.8823541048466864
f1-score = 0.8780519735238063


### With normalization

In [110]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [113]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [114]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9050632911392406
recall = 0.8746177370030581
accuracy = 0.8937125748502994
f1-score = 0.8895800933125972


In [115]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8933474653568255
recall = 0.8805990943921979
accuracy = 0.8912510532292925
f1-score = 0.8868289701098805


### SVM

In [116]:
from sklearn import svm

### No indicators

In [117]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [118]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [119]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5502008032128514
recall = 0.41896024464831805
accuracy = 0.5479041916167665
f1-score = 0.4756944444444445


In [120]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5284210526315789
recall = 0.43263699287795676
accuracy = 0.5387965580198841
f1-score = 0.4706628817800519


### With indicators

In [121]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [122]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [123]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6577777777777778
recall = 0.4525993883792049
accuracy = 0.6167664670658682
f1-score = 0.5362318840579711


In [124]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5986610476901739
recall = 0.477062094531974
accuracy = 0.5931605110582017
f1-score = 0.530172805761751


### Reduce dim

In [125]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [128]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [129]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6486486486486487
recall = 0.22018348623853212
accuracy = 0.5598802395209581
f1-score = 0.3287671232876712


In [133]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7263360451566694
recall = 0.17676767676767677
accuracy = 0.5650049455984175
f1-score = 0.2791667742144672


### With Normalization

In [134]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [135]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [136]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7573770491803279
recall = 0.7064220183486238
accuracy = 0.7455089820359282
f1-score = 0.7310126582278481


In [137]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7566194069719464
recall = 0.7174503657262278
accuracy = 0.7513616880975932
f1-score = 0.7361516411971385


### Boosting

In [138]:
from lightgbm import LGBMClassifier

### No indicators

In [139]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [140]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [141]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6107142857142858
recall = 0.5229357798165137
accuracy = 0.6032934131736527
f1-score = 0.5634266886326195


In [142]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6125130895692524
recall = 0.5150988176774505
accuracy = 0.607015034836817
f1-score = 0.5578539158429426


### With indicators

In [143]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [144]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [145]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9058441558441559
recall = 0.8532110091743119
accuracy = 0.8847305389221557
f1-score = 0.878740157480315


In [147]:
folds = KFold(n_splits=4, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8871857069423352
recall = 0.8443326748457752
accuracy = 0.8724592024419833
f1-score = 0.8649698279686729


### Reduce dim

In [148]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [149]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [150]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.85
recall = 0.7798165137614679
accuracy = 0.8248502994011976
f1-score = 0.8133971291866028


In [151]:
folds = KFold(n_splits=4, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8629408801920923
recall = 0.827242640408313
accuracy = 0.85269439987477
f1-score = 0.8443315712355558


### With normalization

In [152]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [153]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [154]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9046052631578947
recall = 0.8409785932721713
accuracy = 0.8787425149700598
f1-score = 0.8716323296354992


In [155]:
folds = KFold(n_splits=4, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8931290214356842
recall = 0.8637526690301214
accuracy = 0.8838316440339685
f1-score = 0.8778859524448188
