## Build Models

In [442]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from Core.train_k_folds import ml_class_train_template

In [440]:
PATH = "/Users/admin/PycharmProjects/Market_Prediction_Using_News_and_Indicators/Crypto_Currencies_Data/"

### MLP

In [3]:
from sklearn.neural_network import MLPClassifier

### Twitter news

In [237]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [153]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [154]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5102040816326531
recall = 1.0
accuracy = 0.5102040816326531
f1-score = 0.6756756756756758


In [261]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         MLPClassifier,
                                         k_folds = 5,
                                         args={'solver':'adam',
                                               'alpha':0.0002,
                                               'hidden_layer_sizes':(71,)},
                                         method='proba')

metrics

{'precision': 0.515625,
 'recall': 0.99,
 'accuracy': 0.5204081632653061,
 'f1_score': 0.678082191780822}

#### No indicators

In [248]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [249]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [250]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

In [251]:
precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4811320754716981
recall = 1.0
accuracy = 0.4811320754716981
f1-score = 0.6496815286624203


In [264]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         MLPClassifier,
                                         k_folds = 5,
                                         args={'solver':'adam',
                                               'alpha':0.0002,
                                               'hidden_layer_sizes':(71,)},
                                         method='proba')

metrics

{'precision': 0.47547169811320755,
 'recall': 1.0,
 'accuracy': 0.47547169811320755,
 'f1_score': 0.6445012787723786}

#### With Indicators

In [265]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [25]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [26]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5572916666666666
recall = 0.8392156862745098
accuracy = 0.6018867924528302
f1-score = 0.6697965571205008


In [267]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         MLPClassifier,
                                         k_folds = 5,
                                         args={'solver':'adam',
                                               'alpha':0.0002,
                                               'hidden_layer_sizes':(71,)},
                                         method='proba')

metrics

{'precision': 0.6642599277978339,
 'recall': 0.7301587301587301,
 'accuracy': 0.6962264150943396,
 'f1_score': 0.6956521739130435}

### Reduce dim

In [268]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [34]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [35]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4816955684007707
recall = 0.9803921568627451
accuracy = 0.4830188679245283
f1-score = 0.6459948320413437


In [269]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         MLPClassifier,
                                         k_folds = 5,
                                         args={'solver':'adam',
                                               'alpha':0.0002,
                                               'hidden_layer_sizes':(71,)},
                                         method='proba')

metrics

{'precision': 0.4798464491362764,
 'recall': 0.9920634920634921,
 'accuracy': 0.4849056603773585,
 'f1_score': 0.6468305304010349}

### With Normalization

In [270]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [38]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)



In [39]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9094827586206896
recall = 0.8274509803921568
accuracy = 0.8773584905660378
f1-score = 0.8665297741273099


In [274]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         MLPClassifier,
                                         k_folds = 5,
                                         args={'solver':'adam',
                                               'alpha':0.0002,
                                               'hidden_layer_sizes':(71,)},
                                         method='proba')

metrics



{'precision': 0.8464566929133859,
 'recall': 0.8531746031746031,
 'accuracy': 0.8566037735849057,
 'f1_score': 0.8498023715415021}

### Twitter + Indicators(with Normalization)

In [275]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [115]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)



In [116]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.900990099009901
recall = 0.91
accuracy = 0.9030612244897959
f1-score = 0.9054726368159204


In [280]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         MLPClassifier,
                                         k_folds = 5,
                                         args={'solver':'adam',
                                               'alpha':0.0002,
                                               'hidden_layer_sizes':(71,)},
                                         method='proba')

metrics



{'precision': 0.89,
 'recall': 0.89,
 'accuracy': 0.8877551020408163,
 'f1_score': 0.89}

### Random Forest

In [41]:
from sklearn.ensemble import RandomForestClassifier

### Twitter news

In [281]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [147]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [148]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5579710144927537
recall = 0.77
accuracy = 0.5714285714285714
f1-score = 0.6470588235294118


In [296]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         RandomForestClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 1000},
                                         method='proba')

metrics

{'precision': 0.5342465753424658,
 'recall': 0.78,
 'accuracy': 0.5408163265306123,
 'f1_score': 0.6341463414634146}

### No indicators

In [297]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [43]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [44]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6425339366515838
recall = 0.5568627450980392
accuracy = 0.6377358490566037
f1-score = 0.5966386554621849


In [299]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         RandomForestClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 1000},
                                         method='proba')

metrics

{'precision': 0.6909090909090909,
 'recall': 0.6031746031746031,
 'accuracy': 0.6830188679245283,
 'f1_score': 0.6440677966101694}

### With indicators

In [300]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [55]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [56]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9421487603305785
recall = 0.8941176470588236
accuracy = 0.9226415094339623
f1-score = 0.9175050301810866


In [301]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         RandomForestClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 1000},
                                         method='proba')

metrics

{'precision': 0.9698275862068966,
 'recall': 0.8928571428571429,
 'accuracy': 0.9358490566037736,
 'f1_score': 0.9297520661157024}

### Reduce dim

In [302]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [58]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [59]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9399141630901288
recall = 0.8588235294117647
accuracy = 0.9056603773584906
f1-score = 0.8975409836065574


In [305]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         RandomForestClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 1000},
                                         method='proba')

metrics

{'precision': 0.896,
 'recall': 0.8888888888888888,
 'accuracy': 0.8981132075471698,
 'f1_score': 0.8924302788844621}

### With normalization

In [306]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [61]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [62]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9458333333333333
recall = 0.8901960784313725
accuracy = 0.9226415094339623
f1-score = 0.9171717171717171


In [324]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         RandomForestClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 1000},
                                         method='proba')

metrics

{'precision': 0.9344262295081968,
 'recall': 0.9047619047619048,
 'accuracy': 0.9245283018867925,
 'f1_score': 0.9193548387096775}

### Twitter + Indicators(with Normalization)

In [325]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [150]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [151]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.93
recall = 0.93
accuracy = 0.9285714285714286
f1-score = 0.93


In [328]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         RandomForestClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 1000},
                                         method='proba')

metrics

{'precision': 0.9489795918367347,
 'recall': 0.93,
 'accuracy': 0.9387755102040817,
 'f1_score': 0.9393939393939393}

### SVM

In [329]:
from sklearn import svm

### Twitter news

In [330]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [124]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [125]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5102040816326531
recall = 1.0
accuracy = 0.5102040816326531
f1-score = 0.6756756756756758


In [336]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         svm.SVC,
                                         k_folds = 5,
                                         args={'kernel': 'rbf'},
                                         method='choice')

metrics

{'precision': 0.5139664804469274,
 'recall': 0.92,
 'accuracy': 0.5153061224489796,
 'f1_score': 0.6594982078853047}

### No indicators

In [337]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [65]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [66]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.539568345323741
recall = 0.29411764705882354
accuracy = 0.539622641509434
f1-score = 0.3807106598984772


In [341]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         svm.SVC,
                                         k_folds = 5,
                                         args={'kernel': 'rbf'},
                                         method='choice')

metrics

{'precision': 0.5339805825242718,
 'recall': 0.4365079365079365,
 'accuracy': 0.5509433962264151,
 'f1_score': 0.48034934497816595}

### With indicators

In [342]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [68]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [69]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.632
recall = 0.6196078431372549
accuracy = 0.6433962264150943
f1-score = 0.6257425742574257


In [360]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         svm.SVC,
                                         k_folds = 5,
                                         args={'kernel': 'rbf'},
                                         method='choice')

metrics

{'precision': 0.620817843866171,
 'recall': 0.6626984126984127,
 'accuracy': 0.6471698113207547,
 'f1_score': 0.6410748560460653}

### Reduce dim

In [361]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [71]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [72]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6794871794871795
recall = 0.20784313725490197
accuracy = 0.5716981132075472
f1-score = 0.31831831831831836


In [371]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         svm.SVC,
                                         k_folds = 5,
                                         args={'kernel': 'rbf'},
                                         method='choice')

metrics

{'precision': 0.6597938144329897,
 'recall': 0.25396825396825395,
 'accuracy': 0.5830188679245283,
 'f1_score': 0.3667621776504298}

### With Normalization

In [372]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [74]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [75]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8669950738916257
recall = 0.6901960784313725
accuracy = 0.8
f1-score = 0.7685589519650654


In [376]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         svm.SVC,
                                         k_folds = 5,
                                         args={'kernel': 'rbf'},
                                         method='choice')

metrics

{'precision': 0.834061135371179,
 'recall': 0.7579365079365079,
 'accuracy': 0.8132075471698114,
 'f1_score': 0.7941787941787942}

### Twiter + Indicators (with Normalization)

In [377]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [138]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [139]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8404255319148937
recall = 0.79
accuracy = 0.8163265306122449
f1-score = 0.8144329896907218


In [386]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         svm.SVC,
                                         k_folds = 5,
                                         args={'kernel': 'rbf'},
                                         method='choice')

metrics

{'precision': 0.8541666666666666,
 'recall': 0.82,
 'accuracy': 0.8367346938775511,
 'f1_score': 0.836734693877551}

### Boosting

In [45]:
from lightgbm import LGBMClassifier

### Twitter news

In [404]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [130]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [131]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5255474452554745
recall = 0.72
accuracy = 0.5255102040816326
f1-score = 0.6075949367088608


In [406]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         LGBMClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 10000,
                                               'learning_rate': 3e-4,
                                               'num_leaves': 34,
                                               'colsample_bytree': 0.9,
                                               'subsample': 0.8,
                                               'max_depth': 8,
                                               'reg_alpha': 0.1,
                                               'reg_lambda': 0.1,
                                               'verbose': -1,},
                                         method='choice')

metrics

{'precision': 0.539568345323741,
 'recall': 0.75,
 'accuracy': 0.5459183673469388,
 'f1_score': 0.6276150627615062}

### No indicators

In [395]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [47]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [48]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6878306878306878
recall = 0.5098039215686274
accuracy = 0.6528301886792452
f1-score = 0.5855855855855855


In [398]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         LGBMClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 10000,
                                               'learning_rate': 3e-4,
                                               'num_leaves': 34,
                                               'colsample_bytree': 0.9,
                                               'subsample': 0.8,
                                               'max_depth': 8,
                                               'reg_alpha': 0.1,
                                               'reg_lambda': 0.1,
                                               'verbose': -1,},
                                         method='proba')

metrics

{'precision': 0.723404255319149,
 'recall': 0.5396825396825397,
 'accuracy': 0.6830188679245283,
 'f1_score': 0.6181818181818182}

### With indicators

In [407]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [77]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [78]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9149797570850202
recall = 0.8862745098039215
accuracy = 0.9056603773584906
f1-score = 0.900398406374502


In [408]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         LGBMClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 10000,
                                               'learning_rate': 3e-4,
                                               'num_leaves': 34,
                                               'colsample_bytree': 0.9,
                                               'subsample': 0.8,
                                               'max_depth': 8,
                                               'reg_alpha': 0.1,
                                               'reg_lambda': 0.1,
                                               'verbose': -1,},
                                         method='choice')

metrics

{'precision': 0.9098360655737705,
 'recall': 0.8809523809523809,
 'accuracy': 0.9018867924528302,
 'f1_score': 0.8951612903225806}

### Reduce dim

In [409]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [80]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [150]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.85
recall = 0.7798165137614679
accuracy = 0.8248502994011976
f1-score = 0.8133971291866028


In [410]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         LGBMClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 10000,
                                               'learning_rate': 3e-4,
                                               'num_leaves': 34,
                                               'colsample_bytree': 0.9,
                                               'subsample': 0.8,
                                               'max_depth': 8,
                                               'reg_alpha': 0.1,
                                               'reg_lambda': 0.1,
                                               'verbose': -1,},
                                         method='choice')

metrics

{'precision': 0.9122807017543859,
 'recall': 0.8253968253968254,
 'accuracy': 0.879245283018868,
 'f1_score': 0.8666666666666667}

### With normalization

In [417]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [82]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [83]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.927710843373494
recall = 0.9058823529411765
accuracy = 0.9207547169811321
f1-score = 0.9166666666666667


In [433]:
metrics, model = ml_class_train_template(X_btc.to_numpy(),
                                         y_btc,
                                         LGBMClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 10000,
                                               'learning_rate': 3e-4,
                                               'num_leaves': 34,
                                               'colsample_bytree': 0.9,
                                               'subsample': 0.8,
                                               'max_depth': 8,
                                               'reg_alpha': 0.1,
                                               'reg_lambda': 0.1,
                                               'verbose': -1,},
                                         method='choice')

metrics

{'precision': 0.9547325102880658,
 'recall': 0.9206349206349206,
 'accuracy': 0.9415094339622642,
 'f1_score': 0.9373737373737374}

### Twitter + Indicators(with Normalization)

In [434]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [143]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [145]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9019607843137255
recall = 0.92
accuracy = 0.9081632653061225
f1-score = 0.9108910891089109


In [436]:
metrics, model = ml_class_train_template(X.to_numpy(),
                                         y.to_numpy(),
                                         LGBMClassifier,
                                         k_folds = 5,
                                         args={'n_estimators': 10000,
                                               'learning_rate': 3e-4,
                                               'num_leaves': 34,
                                               'colsample_bytree': 0.9,
                                               'subsample': 0.8,
                                               'max_depth': 8,
                                               'reg_alpha': 0.1,
                                               'reg_lambda': 0.1,
                                               'verbose': -1,},
                                         method='proba')

metrics

{'precision': 0.9591836734693877,
 'recall': 0.94,
 'accuracy': 0.9489795918367347,
 'f1_score': 0.9494949494949495}