## Build Models

In [2]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

In [3]:
PATH = "/Users/admin/PycharmProjects/Market_Prediction_Using_News_and_Indicators/Crypto_Currencies_Data/"

### MLP

In [4]:
from sklearn.neural_network import MLPClassifier

#### No indicators

In [85]:
X_ltc = pd.read_csv(PATH + "/LTC.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [87]:
mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_ltc.fit(X_ltc_train, y_ltc_train)

In [11]:
y_predict_ltc = mlp_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.48952095808383234
recall = 1.0
accuracy = 0.48952095808383234
f1-score = 0.657286432160804


In [92]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = mlp_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4849271648233073
recall = 1.0
accuracy = 0.4849271648233073
f1-score = 0.6530968037926289


#### With Indicators

In [93]:
X_ltc = pd.read_csv(PATH + "/LTC_indicators.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [96]:
mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_ltc.fit(X_ltc_train, y_ltc_train)

In [97]:
y_predict_ltc = mlp_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5304518664047151
recall = 0.8256880733944955
accuracy = 0.5568862275449101
f1-score = 0.6459330143540669


In [107]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = mlp_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5415901223105687
recall = 0.6829928409289332
accuracy = 0.5408031417256034
f1-score = 0.5800131459197088


### Reduce dim

In [108]:
X_ltc = pd.read_csv(PATH + "/LTC_reduce.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [111]:
mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_ltc.fit(X_ltc_train, y_ltc_train)

In [112]:
y_predict_ltc = mlp_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.48952095808383234
recall = 1.0
accuracy = 0.48952095808383234
f1-score = 0.657286432160804


In [115]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = mlp_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4849271648233073
recall = 1.0
accuracy = 0.4849271648233073
f1-score = 0.6530968037926289


### With Normalization

In [119]:
X_ltc = pd.read_csv(PATH + "/LTC_normalization.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [120]:
mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_ltc.fit(X_ltc_train, y_ltc_train)



In [121]:
y_predict_ltc = mlp_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8918918918918919
recall = 0.8073394495412844
accuracy = 0.8577844311377245
f1-score = 0.8475120385232745


In [122]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    mlp_model_ltc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = mlp_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")



precision = 0.8320473644003055
recall = 0.77653471026965
accuracy = 0.8156205650883757
f1-score = 0.803217345456406




### Random Forest

In [123]:
from sklearn.ensemble import RandomForestClassifier

### No indicators

In [124]:
X_ltc = pd.read_csv(PATH + "/LTC.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [125]:
rf_model_ltc = RandomForestClassifier(n_estimators=1000)
rf_model_ltc.fit(X_ltc_train, y_ltc_train)

In [126]:
y_predict_ltc = rf_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6317460317460317
recall = 0.6085626911314985
accuracy = 0.6347305389221557
f1-score = 0.6199376947040498


In [127]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    rf_model_ltc = RandomForestClassifier(n_estimators=1000)
    rf_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = rf_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6298668722316032
recall = 0.6359537528997979
accuracy = 0.6421116727851343
f1-score = 0.6328186758893282


### With indicators

In [128]:
X_ltc = pd.read_csv(PATH + "/LTC_indicators.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [129]:
rf_model_ltc = RandomForestClassifier(n_estimators=1000)
rf_model_ltc.fit(X_ltc_train, y_ltc_train)

In [130]:
y_predict_ltc = rf_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9238095238095239
recall = 0.8899082568807339
accuracy = 0.9101796407185628
f1-score = 0.9065420560747663


In [131]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    rf_model_ltc = RandomForestClassifier(n_estimators=1000)
    rf_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = rf_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8968395007142371
recall = 0.8868081019731098
accuracy = 0.8957021185927133
f1-score = 0.8917771074840514


### Reduce dim

In [132]:
X_ltc = pd.read_csv(PATH + "/LTC_reduce.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [133]:
rf_model_ltc = RandomForestClassifier(n_estimators=1000)
rf_model_ltc.fit(X_ltc_train, y_ltc_train)

In [134]:
y_predict_ltc = rf_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9142857142857143
recall = 0.8807339449541285
accuracy = 0.9011976047904192
f1-score = 0.8971962616822431


In [135]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    rf_model_ltc = RandomForestClassifier(n_estimators=1000)
    rf_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = rf_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8595349623333977
recall = 0.8735999900222006
accuracy = 0.8695021756723473
f1-score = 0.8664875294088777


### With normalization

In [136]:
X_ltc = pd.read_csv(PATH + "/LTC_normalization.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [137]:
rf_model_ltc = RandomForestClassifier(n_estimators=1000)
rf_model_ltc.fit(X_ltc_train, y_ltc_train)

In [138]:
y_predict_ltc = rf_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9242902208201893
recall = 0.8960244648318043
accuracy = 0.9131736526946108
f1-score = 0.9099378881987578


In [139]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    rf_model_ltc = RandomForestClassifier(n_estimators=1000)
    rf_model_ltc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = rf_model_ltc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9008788896083979
recall = 0.8899136920352216
accuracy = 0.8991606166164288
f1-score = 0.895348152138234


### SVM

In [140]:
from sklearn import svm

### No indicators

In [180]:
X_ltc = pd.read_csv(PATH + "/LTC.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [181]:
svm_model_ltc = svm.SVC(kernel='rbf')

svm_model_ltc.fit(X_ltc_train, y_ltc_train)

In [182]:
y_predict_ltc = svm_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4482758620689655
recall = 0.039755351681957186
accuracy = 0.5059880239520959
f1-score = 0.07303370786516854


In [184]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = svm_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.49449401070276783
recall = 0.3176531047543604
accuracy = 0.5106197286395308
f1-score = 0.3660439528321849


### With indicators

In [185]:
X_ltc = pd.read_csv(PATH + "/LTC_indicators.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [186]:
svm_model_ltc = svm.SVC(kernel='rbf')

svm_model_ltc.fit(X_ltc_train, y_ltc_train)

In [187]:
y_predict_ltc = svm_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6301369863013698
recall = 0.5626911314984709
accuracy = 0.624251497005988
f1-score = 0.5945072697899838


In [188]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = svm_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6194865452263257
recall = 0.6197866662680983
accuracy = 0.6312370125901479
f1-score = 0.6191606210697372


### Reduce dim

In [189]:
X_ltc = pd.read_csv(PATH + "/LTC_reduce.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [190]:
svm_model_ltc = svm.SVC(kernel='rbf')

svm_model_ltc.fit(X_ltc_train, y_ltc_train)

In [191]:
y_predict_ltc = svm_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7586206896551724
recall = 0.2691131498470948
accuracy = 0.6002994011976048
f1-score = 0.39729119638826177


In [192]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = svm_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.751211345161057
recall = 0.32891319740905633
accuracy = 0.6213421342134213
f1-score = 0.45464911917291406


### With Normalization

In [193]:
X_ltc = pd.read_csv(PATH + "/LTC_normalization.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [194]:
svm_model_ltc = svm.SVC(kernel='rbf')

svm_model_ltc.fit(X_ltc_train, y_ltc_train)

In [195]:
y_predict_ltc = svm_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8275862068965517
recall = 0.7339449541284404
accuracy = 0.7949101796407185
f1-score = 0.7779578606158833


In [196]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = svm_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8445138358550869
recall = 0.7804865274542004
accuracy = 0.8235130179684635
f1-score = 0.8108925642650127


### Boosting

In [197]:
from lightgbm import LGBMClassifier

### No indicators

In [198]:
X_ltc = pd.read_csv(PATH + "/LTC.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [199]:
boosting_model_ltc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_ltc.fit(X_ltc_train, y_ltc_train)

In [200]:
y_predict_ltc = boosting_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6542372881355932
recall = 0.5902140672782875
accuracy = 0.6467065868263473
f1-score = 0.6205787781350482


In [201]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6683486821187594
recall = 0.6291749447453429
accuracy = 0.668318054027625
f1-score = 0.6470981141899255


### With indicators

In [202]:
X_ltc = pd.read_csv(PATH + "/LTC_indicators.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [203]:
boosting_model_ltc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_ltc.fit(X_ltc_train, y_ltc_train)

In [204]:
y_predict_ltc = boosting_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9238095238095239
recall = 0.8899082568807339
accuracy = 0.9101796407185628
f1-score = 0.9065420560747663


In [205]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9150949375470784
recall = 0.8998376830222943
accuracy = 0.9105219410829972
f1-score = 0.9070015333906574


### Reduce dim

In [206]:
X_ltc = pd.read_csv(PATH + "/LTC_reduce.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [207]:
boosting_model_ltc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_ltc.fit(X_ltc_train, y_ltc_train)

In [208]:
y_predict_ltc = boosting_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8793650793650793
recall = 0.8470948012232415
accuracy = 0.8682634730538922
f1-score = 0.8629283489096573


In [209]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8685915797400569
recall = 0.8891740230559361
accuracy = 0.8808519740862975
f1-score = 0.8784168111508558


### With normalization

In [210]:
X_ltc = pd.read_csv(PATH + "/LTC_normalization.csv", index_col='time')
y_ltc = pd.read_csv(PATH + "/LTC_target.csv", index_col='time')
y_ltc = y_ltc.values.ravel()

X_ltc_train, X_ltc_test, y_ltc_train, y_ltc_test = train_test_split(X_ltc, y_ltc, test_size=0.33, random_state=42)

In [211]:
boosting_model_ltc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_ltc.fit(X_ltc_train, y_ltc_train)

In [212]:
y_predict_ltc = boosting_model_ltc.predict(X_ltc_test)

precision = precision_score(y_ltc_test, y_predict_ltc)
recall = recall_score(y_ltc_test, y_predict_ltc)
accuracy = accuracy_score(y_ltc_test, y_predict_ltc)
f1 = f1_score(y_ltc_test, y_predict_ltc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9348534201954397
recall = 0.8776758409785933
accuracy = 0.9101796407185628
f1-score = 0.9053627760252365


In [213]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_ltc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_ltc.iloc[train_index], y_ltc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_ltc.iloc[test_index])

    precision_list.append(precision_score(y_ltc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_ltc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_ltc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_ltc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9168936476152971
recall = 0.8989494484498491
accuracy = 0.9110145458990344
f1-score = 0.9074465215134087
