In [1]:
import sys

sys.path.append("..")

In [2]:
from src.preprocessing import prepare_target, split_features_target
import pandas as pd
import numpy as np

In [7]:
from tuneta.tune_ta import TuneTA

In [27]:
tt = TuneTA(n_jobs=4, verbose=True)

In [29]:
tt.fit(
    X,
    y["target_position_change"],
    indicators=["tta.RSI", "tta.ATR", "tta.TSF"],
    ranges=[(4, 30), (31, 100), (101, 150)],
    trials=300,
    early_stop=50,
)

In [32]:
tt.prune()

In [79]:
X = tt.transform(X)

In [30]:
tt.report()


Indicator Correlation to Target:

                          Correlation
----------------------  -------------
tta_RSI_timeperiod_42        0.533896
tta_RSI_timeperiod_27        0.515163
tta_RSI_timeperiod_104       0.454011
tta_ATR_timeperiod_14        0.276263
tta_ATR_timeperiod_35        0.267375
tta_ATR_timeperiod_103       0.239368
tta_TSF_timeperiod_7         0.141208
tta_TSF_timeperiod_35        0.14118
tta_TSF_timeperiod_103       0.140598

Indicator Correlation to Each Other:

                          tta_RSI_timeperiod_42    tta_RSI_timeperiod_27    tta_RSI_timeperiod_104    tta_ATR_timeperiod_14    tta_ATR_timeperiod_35    tta_ATR_timeperiod_103    tta_TSF_timeperiod_7    tta_TSF_timeperiod_35    tta_TSF_timeperiod_103
----------------------  -----------------------  -----------------------  ------------------------  -----------------------  -----------------------  ------------------------  ----------------------  -----------------------  ------------------------
tta_RSI_t

In [31]:
tt.fit_times()

    Indicator      Times
--  -----------  -------
 0  tta.RSI       317.02
 6  tta.TSF       249.99
 4  tta.ATR       227.81
 7  tta.TSF       224.19
 3  tta.ATR       223.45
 2  tta.RSI       208.55
 5  tta.ATR       205.52
 1  tta.RSI       205.26
 8  tta.TSF       204.78


In [72]:
df = pd.read_csv("../data/data.csv")
df.drop(columns=["symbol", "exchange", "turnover", "open_interest"], inplace=True)

In [15]:
df = prepare_target(df, 50)

Generating targets
Targets generated


In [35]:
X, y = split_features_target(df)

In [77]:
X.set_index(pd.DatetimeIndex(X.datetime), inplace=True)

In [78]:
y.set_index(pd.DatetimeIndex(X.datetime), inplace=True)

In [47]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split

In [41]:
model = HistGradientBoostingClassifier()

In [48]:
X_train, x_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.5)

In [64]:
model.fit(X_train, y_train["target_position_change_quartile"])

In [51]:
model.score(x_test, y_test["target_position_change_quartile"])

0.9942319240615682

In [61]:
model.score(X_train, y_train["target_position_change_quartile"])

0.9983248787117369

In [66]:
model.predict_proba(x_test)

array([[9.99929160e-01, 2.63245300e-06, 6.82071843e-05],
       [9.99929160e-01, 2.63245300e-06, 6.82071843e-05],
       [9.99912156e-01, 2.93329303e-06, 8.49105521e-05],
       ...,
       [9.99743216e-01, 4.92851526e-06, 2.51855855e-04],
       [9.99743216e-01, 4.92851526e-06, 2.51855855e-04],
       [9.99743216e-01, 4.92851526e-06, 2.51855855e-04]])

In [68]:
pd.Series(model.predict(x_test)).value_counts()

meh            189388
strong_sell       437
strong_buy         13
dtype: int64

In [74]:
from sklearn.preprocessing import StandardScaler
from pandas_ta import log_return


def prepare_target(df, future_period):
    print("Generating targets")
    df = df.copy()
    scaler = StandardScaler()
    df[f"{future_period}m_ret"] = scaler.fit_transform(
        log_return(
            df.close, length=future_period, offset=-future_period
        ).values.reshape(-1, 1)
    )
    df.dropna(inplace=True)
    df["target_position_change"] = (df[f"{future_period}m_ret"] * 10).apply(int)
    df["target_position_change"] = df["target_position_change"] - df[
        "target_position_change"
    ].shift(future_period).fillna(0)
    df["target_total_position"] = df["target_position_change"].cumsum()
    df["target_position_change_quartile"] = pd.qcut(
        df["target_position_change"],
        5,
        labels=["strong_sell", "sell", "meh", "buy", "strong_buy"],
    )
    df["target_total_position_quartile"] = pd.cut(
        df["target_total_position"],
        7,
        labels=[
            "max_short_pos",
            "short_hold",
            "short_hold",
            "no_trade",
            "long_hold",
            "long_hold",
            "max_long_pos",
        ],
        ordered=False,
    )
    df.drop(columns=[f"{future_period}m_ret"], inplace=True)
    print("Targets generated")

    return df

In [75]:
df = prepare_target(df, 50)

Generating targets
Targets generated


In [76]:
X, y = split_features_target(df)

In [80]:
model.fit(X, y["target_position_change_quartile"])

In [81]:
model.score(X, y["target_position_change_quartile"])

0.4618871403173767

In [83]:
model.predict(X)

array(['strong_sell', 'strong_sell', 'strong_sell', ..., 'sell', 'sell',
       'sell'], dtype=object)