## Build Models

In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

In [2]:
PATH = "/Users/admin/PycharmProjects/Market_Prediction_Using_News_and_Indicators/Crypto_Currencies_Data/"

### MLP

In [3]:
from sklearn.neural_network import MLPClassifier

### Twitter news

In [152]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [153]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [154]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5102040816326531
recall = 1.0
accuracy = 0.5102040816326531
f1-score = 0.6756756756756758


#### No indicators

In [4]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [17]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [18]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

In [19]:
precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4811320754716981
recall = 1.0
accuracy = 0.4811320754716981
f1-score = 0.6496815286624203


In [23]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4528211971025543
recall = 0.6866669856328517
accuracy = 0.46948941469489414
f1-score = 0.5244274049577102


#### With Indicators

In [24]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [25]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [26]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5572916666666666
recall = 0.8392156862745098
accuracy = 0.6018867924528302
f1-score = 0.6697965571205008


In [27]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5499719887955182
recall = 0.7438040818558365
accuracy = 0.5877957658779577
f1-score = 0.6323413207957171


### Reduce dim

In [28]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [34]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)

In [35]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.4816955684007707
recall = 0.9803921568627451
accuracy = 0.4830188679245283
f1-score = 0.6459948320413437


In [36]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.2361842105263158
recall = 0.46866840731070497
accuracy = 0.4975093399750934
f1-score = 0.31408573928258965


  _warn_prf(average, modifier, msg_start, len(result))


### With Normalization

In [37]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [38]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)



In [39]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9094827586206896
recall = 0.8274509803921568
accuracy = 0.8773584905660378
f1-score = 0.8665297741273099


In [40]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))
    mlp_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = mlp_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")



precision = 0.7368228077990611
recall = 0.7359711836835127
accuracy = 0.7490660024906601
f1-score = 0.7363813063132427




### Twitter + Indicators(with Normalization)

In [114]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [115]:
mlp_model_btc = MLPClassifier(solver='adam', alpha=0.0002, hidden_layer_sizes=(71,))

mlp_model_btc.fit(X_btc_train, y_btc_train)



In [116]:
y_predict_btc = mlp_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.900990099009901
recall = 0.91
accuracy = 0.9030612244897959
f1-score = 0.9054726368159204


### Random Forest

In [41]:
from sklearn.ensemble import RandomForestClassifier

### Twitter news

In [146]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [147]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [148]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5579710144927537
recall = 0.77
accuracy = 0.5714285714285714
f1-score = 0.6470588235294118


### No indicators

In [42]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [43]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [44]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6425339366515838
recall = 0.5568627450980392
accuracy = 0.6377358490566037
f1-score = 0.5966386554621849


In [100]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5849836048643205
recall = 0.5764847331112392
accuracy = 0.5966385569017487
f1-score = 0.5801829268292683


### With indicators

In [54]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [55]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [56]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9421487603305785
recall = 0.8941176470588236
accuracy = 0.9226415094339623
f1-score = 0.9175050301810866


In [105]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8962086042783509
recall = 0.8816440264716127
accuracy = 0.893228559915009
f1-score = 0.8887951541149824


### Reduce dim

In [57]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [58]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [59]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9399141630901288
recall = 0.8588235294117647
accuracy = 0.9056603773584906
f1-score = 0.8975409836065574


In [109]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8813018511944675
recall = 0.8756530825496341
accuracy = 0.8823541048466864
f1-score = 0.8780519735238063


### With normalization

In [60]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [61]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [62]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9458333333333333
recall = 0.8901960784313725
accuracy = 0.9226415094339623
f1-score = 0.9171717171717171


In [115]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    rf_model_btc = RandomForestClassifier(n_estimators=1000)
    rf_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = rf_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8933474653568255
recall = 0.8805990943921979
accuracy = 0.8912510532292925
f1-score = 0.8868289701098805


### Twitter + Indicators(with Normalization)

In [149]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [150]:
rf_model_btc = RandomForestClassifier(n_estimators=1000)
rf_model_btc.fit(X_btc_train, y_btc_train)

In [151]:
y_predict_btc = rf_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.93
recall = 0.93
accuracy = 0.9285714285714286
f1-score = 0.93


### SVM

In [63]:
from sklearn import svm

### Twitter news

In [123]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [124]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [125]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5102040816326531
recall = 1.0
accuracy = 0.5102040816326531
f1-score = 0.6756756756756758


### No indicators

In [64]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [65]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [66]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.539568345323741
recall = 0.29411764705882354
accuracy = 0.539622641509434
f1-score = 0.3807106598984772


In [120]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5284210526315789
recall = 0.43263699287795676
accuracy = 0.5387965580198841
f1-score = 0.4706628817800519


### With indicators

In [67]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [68]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [69]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.632
recall = 0.6196078431372549
accuracy = 0.6433962264150943
f1-score = 0.6257425742574257


In [124]:
folds = KFold(n_splits=2, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5986610476901739
recall = 0.477062094531974
accuracy = 0.5931605110582017
f1-score = 0.530172805761751


### Reduce dim

In [70]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [71]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [72]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6794871794871795
recall = 0.20784313725490197
accuracy = 0.5716981132075472
f1-score = 0.31831831831831836


In [133]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7263360451566694
recall = 0.17676767676767677
accuracy = 0.5650049455984175
f1-score = 0.2791667742144672


### With Normalization

In [73]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [74]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [75]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8669950738916257
recall = 0.6901960784313725
accuracy = 0.8
f1-score = 0.7685589519650654


In [137]:
folds = KFold(n_splits=3, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    svm_model_btc = svm.SVC(kernel='rbf')
    svm_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = svm_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7566194069719464
recall = 0.7174503657262278
accuracy = 0.7513616880975932
f1-score = 0.7361516411971385


### Twiter + Indicators (with Normalization)

In [137]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [138]:
svm_model_btc = svm.SVC(kernel='rbf')

svm_model_btc.fit(X_btc_train, y_btc_train)

In [139]:
y_predict_btc = svm_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8404255319148937
recall = 0.79
accuracy = 0.8163265306122449
f1-score = 0.8144329896907218


### Boosting

In [45]:
from lightgbm import LGBMClassifier

### Twitter news

In [129]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df = pd.merge(y_btc, X_btc, left_index=True, right_index=True)
y = df['Close']
X = df.loc[:, 'neg':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [130]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [131]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.5255474452554745
recall = 0.72
accuracy = 0.5255102040816326
f1-score = 0.6075949367088608


### No indicators

In [46]:
X_btc = pd.read_csv(PATH + "/BTC.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [47]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [48]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.6878306878306878
recall = 0.5098039215686274
accuracy = 0.6528301886792452
f1-score = 0.5855855855855855


In [49]:
folds = KFold(n_splits=5, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.7101868649856647
recall = 0.5040732306619133
accuracy = 0.6650122869139528
f1-score = 0.5885795507475021


### With indicators

In [76]:
X_btc = pd.read_csv(PATH + "/BTC_indicators.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [77]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [78]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9149797570850202
recall = 0.8862745098039215
accuracy = 0.9056603773584906
f1-score = 0.900398406374502


In [147]:
folds = KFold(n_splits=4, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8871857069423352
recall = 0.8443326748457752
accuracy = 0.8724592024419833
f1-score = 0.8649698279686729


### Reduce dim

In [79]:
X_btc = pd.read_csv(PATH + "/BTC_reduce.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [80]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [150]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.85
recall = 0.7798165137614679
accuracy = 0.8248502994011976
f1-score = 0.8133971291866028


In [151]:
folds = KFold(n_splits=4, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8629408801920923
recall = 0.827242640408313
accuracy = 0.85269439987477
f1-score = 0.8443315712355558


### With normalization

In [81]:
X_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')
y_btc = y_btc.values.ravel()

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X_btc, y_btc, test_size=0.33, random_state=42)

In [82]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [83]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.927710843373494
recall = 0.9058823529411765
accuracy = 0.9207547169811321
f1-score = 0.9166666666666667


In [155]:
folds = KFold(n_splits=4, shuffle=True, random_state=42)

precision_list = []
recall_list = []
accuracy_list = []
f1_list = []

for i, (train_index, test_index) in enumerate(folds.split(X_btc)):
    boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )
    boosting_model_btc.fit(X_btc.iloc[train_index], y_btc[train_index])
    y_predict_btc = boosting_model_btc.predict(X_btc.iloc[test_index])

    precision_list.append(precision_score(y_btc[test_index], y_predict_btc))
    recall_list.append(recall_score(y_btc[test_index], y_predict_btc))
    accuracy_list.append(accuracy_score(y_btc[test_index], y_predict_btc))
    f1_list.append(f1_score(y_btc[test_index], y_predict_btc))

precision = sum(precision_list) / len(precision_list)
recall = sum(recall_list) / len(recall_list)
accuracy = sum(accuracy_list) / len(accuracy_list)
f1 = sum(f1_list) / len(f1_list)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.8931290214356842
recall = 0.8637526690301214
accuracy = 0.8838316440339685
f1-score = 0.8778859524448188


### Twitter + Indicators(with Normalization)

In [141]:
X_btc = pd.read_csv(PATH + "/twitter_btc.csv", index_col='time')
Z_btc = pd.read_csv(PATH + "/BTC_normalization.csv", index_col='time')
y_btc = pd.read_csv(PATH + "/BTC_target.csv", index_col='time')

df1 = pd.merge(Z_btc, X_btc, left_index=True, right_index=True)
df2 = pd.merge(y_btc, df1, left_index=True, right_index=True)

y = df2['Close']
X = df2.loc[:, 'UpperBB':]

X_btc_train, X_btc_test, y_btc_train, y_btc_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [143]:
boosting_model_btc = LGBMClassifier(
        n_estimators=10000,
        learning_rate=3e-4,
        num_leaves=34,
        colsample_bytree=0.9,
        subsample=0.8,
        max_depth=8,
        reg_alpha=.1,
        reg_lambda=.1,
        verbose=-1,
        )

boosting_model_btc.fit(X_btc_train, y_btc_train)

In [145]:
y_predict_btc = boosting_model_btc.predict(X_btc_test)

precision = precision_score(y_btc_test, y_predict_btc)
recall = recall_score(y_btc_test, y_predict_btc)
accuracy = accuracy_score(y_btc_test, y_predict_btc)
f1 = f1_score(y_btc_test, y_predict_btc)

print(f"precision = {precision}")
print(f"recall = {recall}")
print(f"accuracy = {accuracy}")
print(f"f1-score = {f1}")

precision = 0.9019607843137255
recall = 0.92
accuracy = 0.9081632653061225
f1-score = 0.9108910891089109
