# Training Process

In [None]:
import numpy as np
import pandas as pd
from libs import (
    RollingTrain, 
    DNNModel, 
    LGBModel, 
    TabNetModel, 
    DoubleEnsembleModel,
    FusionModel,
    LinearModel,
)

## Data Preparation

In [None]:
ret = pd.read_parquet('../data/intermediate/forward_return/1d_open_open.parquet')
dataset = pd.read_parquet('../data/intermediate/feature_info/normalized_dataset.parquet')
pool = pd.read_parquet('../data/stock_pool/stock_liquid_updown.parquet')
pool = pool.set_index(['date', 'stock_code']).index.intersection(dataset.index)
dataset = dataset.loc[pool]
train = dataset.loc["2018-01-01":"2018-06-30"]
test = dataset.loc["2018-07-01":"2018-07-31"]

## LGB Model

In [None]:
lgbm = LGBModel(ret, ret_stop=100, top=0.1)

In [None]:
# BENCHMARK: time: 40s, top_ret: 10.67%
lgbm.fit(train, test)
pd.Series(lgbm.evals_result['valid']['top_ret']).plot();

## DNN Model

In [None]:
dnn = DNNModel(ret, ret_stop=10)

In [None]:
# BENCHMARK: time: 4min20s, top_ret: 4.08%
dnn.fit(train, test)
pd.Series(dnn.evals_result['valid']['top_ret']).plot();

## TabNet Model

In [None]:
tabnet = TabNetModel(ret, ret_stop=5)

In [None]:
# BENCHMARK: time: 15min-17min, top_ret: 5.36%
tabnet.fit(train, test)
pd.Series(tabnet.evals_result['valid']['top_ret']).plot();

## Double Ensemble Model

In [None]:
doubens = DoubleEnsembleModel(ret, ret_stop=50, stop_models=3, enable_fs=False, enable_sr=True, n_estimators=100)

In [None]:
# BENCHMARK: time: 29min, top_ret: 11.33%
doubens.fit(train, test)
pd.Series(doubens.evals_result['valid']['top_ret']).plot();

## Fusion Model

In [None]:
fusion = FusionModel(
    ret, 
    models=[LGBModel, DNNModel], 
    model_kwargs=[{"ret_stop": 100, }, {"ret_stop": 10}],
    fusion=LinearModel(ret, in_feature=2, out_feature=1)
)

In [None]:
# BENCHMARK: time: 6min53s, top_ret: 10.37%
fusion.fit(train, test)

## Rolling Training

In [None]:
roller = RollingTrain(
    min_days=120, 
    max_days=140, 
    pred_days=10, 
    learn_days=200, 
    exp_path='../data/intermediate/results/', 
    exp_name='fusion',
)
roller.rolling(model=fusion, dataset=dataset)