In [1]:
import pandas as pd
import numpy as np
import os

chart_df = pd.read_csv("/content/drive/MyDrive/chart_with_sentiment_scores.csv")

In [4]:
import pandas_ta as ta
from tqdm.auto import tqdm
hours, days, months = [], [], []
for dt in tqdm(chart_df["datetime"]):
  dtobj = pd.to_datetime(dt)
  hours.append(dtobj.hour)
  days.append(dtobj.day)
  months.append(dtobj.month)

chart_df["hours"] = hours
chart_df["days"] = days
chart_df["months"] = months

chart_df["ebsw"] = chart_df.ta.ebsw(lookahead=False)
chart_df["cmf"] = chart_df.ta.cmf(lookahead=False)
chart_df["bop"] = chart_df.ta.bop(lookahead=False)
chart_df["rsi/100"] = chart_df.ta.rsi(lookahead=False) / 100.0
chart_df["hwma"] = chart_df.ta.hwma(lookahead=False)
chart_df["linreg"] = chart_df.ta.linreg(lookahead=False)
chart_df["hwma/close"] = chart_df["hwma"] / chart_df["close"]
chart_df["linreg/close"] = chart_df["linreg"] / chart_df["close"]
chart_df["high/low"] = chart_df["high"] / chart_df["low"]
chart_df["high/open"] = chart_df["high"] / chart_df["open"]
chart_df["low/open"] = chart_df["low"] / chart_df["open"]
chart_df["close/open"] = chart_df["close"] / chart_df["open"]
chart_df["high/close"] = chart_df["high"] / chart_df["close"]
chart_df["low/close"]  = chart_df["low"] / chart_df["close"]

for l in range(1, 6):
  for col in ["open", "high", "low", "close", "volume"]:
    val = chart_df[col].values
    val_ret = [None for _ in range(l)]
    for i in range(l, len(val)):
      if val[i-l] == 0:
        ret = 1
      else:
        ret = val[i] / val[i-l]
      val_ret.append(ret)
    chart_df[f"{col}_change_{l}"] = val_ret

chart_df = chart_df.drop(columns={"datetime", "open", "high", "low", "close", "volume", "linreg", "hwma"})

  0%|          | 0/9335 [00:00<?, ?it/s]

In [5]:
chart_df.dropna(inplace=True)

train_columns = []

for col in chart_df.columns:
  if col != "targets":
    train_columns.append(col)

X = chart_df[train_columns]
Y = chart_df["targets"]

# Train XGBoost

In [None]:
import xgboost as xgb

xgboost_best_params = {'subsample': 0.7794564925617046,
                       'colsample_bytree': 0.9953018636025351,
                       'max_depth': 9,
                       'learning_rate': 0.013138686044784876,
                       'min_child_weight': 9.040062755238312,
                       'gamma': 2.1660919252569797,
                       'n_estimators': 1847,
                       'objective': "multi:softmax",
                       'num_class': 3,
                       'eval_metric': 'merror',
                       'tree_method': 'gpu_hist'}

clf = xgb.XGBClassifier(**xgboost_best_params)
clf.fit(X, Y, eval_set=[(X, Y)], verbose=100)
clf.save_model("xgboost_20230703")

[0]	validation_0-merror:0.39619
[100]	validation_0-merror:0.19213
[200]	validation_0-merror:0.15491
[300]	validation_0-merror:0.13554
[400]	validation_0-merror:0.12478
[500]	validation_0-merror:0.12059
[600]	validation_0-merror:0.11747
[700]	validation_0-merror:0.11510
[800]	validation_0-merror:0.11521
[900]	validation_0-merror:0.11252
[1000]	validation_0-merror:0.11284
[1100]	validation_0-merror:0.11231
[1200]	validation_0-merror:0.11177
[1300]	validation_0-merror:0.11112
[1400]	validation_0-merror:0.11015
[1500]	validation_0-merror:0.10951
[1600]	validation_0-merror:0.10983
[1700]	validation_0-merror:0.10929
[1800]	validation_0-merror:0.10811
[1846]	validation_0-merror:0.10768


# Train LightGBM

In [None]:
import lightgbm as lgb

lightgbm_best_params = {'num_leaves': 98,
                        'learning_rate': 0.010726258300226442,
                                         'max_depth': 6,
                                         'min_child_samples': 4,
                                         'subsample': 0.6186710352439998,
                                         'colsample_bytree': 0.5690642326572914,
                                         'reg_alpha': 0.37843237624912734,
                                         'reg_lambda': 0.14538913425870043,
                                         'n_estimators': 255,
                                         'objective': 'multiclass',
                                         'num_class': 3,
                                         'metric': 'multi_logloss',
                                         'boosting_type': 'gbdt',
                                         'random_state': 42}

lgb_clf = lgb.LGBMClassifier(**lightgbm_best_params)
lgb_clf.fit(X, Y, eval_set=[(X, Y)], verbose=100)
lgb_clf.booster_.save_model("lightgbm_20230703")



[100]	training's multi_logloss: 0.902585
[200]	training's multi_logloss: 0.81345


<lightgbm.basic.Booster at 0x7f7c7c079cf0>

# Train CatBoost

In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2-cp310-cp310-manylinux2014_x86_64.whl (98.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2


In [None]:
import catboost as cb

catboost_best_params = {'iterations': 1721,
                        'learning_rate': 0.049627662077003816,
                        'depth': 8,
                        'l2_leaf_reg': 2.2763608031118214,
                        'border_count': 253,
                        'task_type': 'GPU',
                        'loss_function': "MultiClass",
                        'eval_metric': "Accuracy",
                        'random_seed': 42,
                        'verbose': False}

cb_clf = cb.CatBoostClassifier(**catboost_best_params)
cb_clf.fit(X, Y, eval_set=(X, Y), verbose=100)
cb_clf.save_model("catboost_20230703")
print("done saving!")

0:	learn: 0.4949441	test: 0.4949441	best: 0.4949441 (0)	total: 10.5ms	remaining: 18.1s
100:	learn: 0.6983649	test: 0.6983649	best: 0.6983649 (100)	total: 834ms	remaining: 13.4s
200:	learn: 0.7806583	test: 0.7806583	best: 0.7806583 (200)	total: 1.64s	remaining: 12.4s
300:	learn: 0.8447719	test: 0.8447719	best: 0.8447719 (300)	total: 2.45s	remaining: 11.6s
400:	learn: 0.8841437	test: 0.8841437	best: 0.8841437 (400)	total: 3.24s	remaining: 10.7s
500:	learn: 0.9170611	test: 0.9170611	best: 0.9170611 (500)	total: 4.03s	remaining: 9.8s
600:	learn: 0.9430938	test: 0.9430938	best: 0.9430938 (598)	total: 4.83s	remaining: 8.99s
700:	learn: 0.9634251	test: 0.9634251	best: 0.9634251 (700)	total: 5.62s	remaining: 8.17s
800:	learn: 0.9757960	test: 0.9757960	best: 0.9757960 (798)	total: 6.43s	remaining: 7.39s
900:	learn: 0.9854776	test: 0.9854776	best: 0.9855852 (899)	total: 7.23s	remaining: 6.58s
1000:	learn: 0.9909639	test: 0.9909639	best: 0.9911790 (997)	total: 8.04s	remaining: 5.79s
1100:	learn: 

# Train TabNet

In [6]:
!nvidia-smi

Mon Jul  3 03:02:09 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [7]:
from pytorch_tabnet.tab_model import TabNetClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

best_tabnet_params = {'n_d': 16,
                      'n_a': 16,
                      'n_steps': 3,
                      'gamma': 1.3776893571377133,
                      'mask_type': 'sparsemax'
                      }

model = TabNetClassifier(**best_tabnet_params,
                        optimizer_fn=optim.Adam,
                        optimizer_params=dict(lr=1e-3),
                        device_name = "cuda" if torch.cuda.is_available() else "cpu",
                        verbose=5)

model.fit(X.values,
         Y.values,
         eval_set=[(X.values, Y.values)],
         eval_metric = ["balanced_accuracy"],
         patience=100,
         batch_size=32,
         virtual_batch_size = 32,
         num_workers = 0,
         max_epochs=187,
         drop_last = False
)



epoch 0  | loss: 1.53812 | val_0_balanced_accuracy: 0.33588 |  0:00:09s
epoch 5  | loss: 1.05661 | val_0_balanced_accuracy: 0.43924 |  0:00:51s
epoch 10 | loss: 1.0354  | val_0_balanced_accuracy: 0.459   |  0:01:33s
epoch 15 | loss: 1.0143  | val_0_balanced_accuracy: 0.47254 |  0:02:14s
epoch 20 | loss: 1.00296 | val_0_balanced_accuracy: 0.46627 |  0:02:56s
epoch 25 | loss: 0.99411 | val_0_balanced_accuracy: 0.48181 |  0:03:37s
epoch 30 | loss: 0.98613 | val_0_balanced_accuracy: 0.48019 |  0:04:18s
epoch 35 | loss: 0.97851 | val_0_balanced_accuracy: 0.48904 |  0:05:00s
epoch 40 | loss: 0.97572 | val_0_balanced_accuracy: 0.49412 |  0:05:41s
epoch 45 | loss: 0.96837 | val_0_balanced_accuracy: 0.49736 |  0:06:23s
epoch 50 | loss: 0.96577 | val_0_balanced_accuracy: 0.501   |  0:07:04s
epoch 55 | loss: 0.96194 | val_0_balanced_accuracy: 0.49709 |  0:07:45s
epoch 60 | loss: 0.95595 | val_0_balanced_accuracy: 0.51171 |  0:08:27s
epoch 65 | loss: 0.95594 | val_0_balanced_accuracy: 0.5106  |  0



In [8]:
model.save_model("/content/drive/MyDrive/tabnet_20230703")
print("done saving!")

Successfully saved model at /content/drive/MyDrive/tabnet_20230703.zip
done saving!
