# Model Trainings for Stock Data

Here are some non-DL model training processes:

* random forest
* AdaBoost
* XGBoost

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
import pickle
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import tqdm
import my_pywt as pywt

## Load Raw Data

In [None]:
df_train = pd.read_csv('DataSet/TrainSet.csv')
df_val = pd.read_csv('DataSet/ValSet.csv')
indicators = df_train.columns.values[:108].tolist()
market_stat = ['midPrice',  'LastPrice', 'Volume', 'LastVolume', 'Turnover', 'LastTurnover',
       'OpenInterest', 'UpperLimitPrice', 'LowerLimitPrice', 'am_pm',
       'UpdateMinute']
features = indicators + market_stat
train_data = df_train[features]
train_label = df_train['label']
train_data=train_data.values
train_label=train_label.values
val_data = df_val[features]
val_label = df_val['label']
val_data=val_data.values
val_label=val_label.values

## Random Forest for Raw Data 

In [None]:
rf_raw = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),n_estimators=20,learning_rate=1)
rf_raw.fit(train_data, train_label)
val_pred=rf.predict(val_data)
((val_label-val_pred)**2).mean()
with open('AdaBoostReg_Raw.pkl','wb') as fh:
    pickle.dump(rf_raw, fh)

## Adaboost for Raw Data

In [None]:
ada_raw = RandomForestRegressor(10, max_depth=7, n_jobs=10)
ada_raw.fit(train_data, train_label)
val_pred=ada.predict(val_data)
((val_label-val_pred)**2).mean()
with open('Randomforest_Raw.pkl','wb') as fh:
    pickle.dump(ada_raw, fh)

## XGboost for Raw Data

In [None]:
dtrain = xgb.DMatrix(train_data, label=train_label)
dval = xgb.DMatrix(val_data, label=val_label)

### Parameters

In [None]:
param_gbtree = {
    # General Parameters:
    'booster':'gbtree',
    # For GBtree
    'eta':0.1,
    'gamma': 0,
    'max_depth': 5,
    'min_child_weight': 1,
    'subsample': 0.9,
    'lambda': 0.5,
    'alpha': 0,
    'tree_method': 'auto', 
    'num_parallel_tree': 1, # For Random Forest
    # Learning Task
#     'objective': 'reg:squarederror',
    'eval_metric': ['rmse']
}
evallist = [(dtrain, 'train'),(dval, 'eval')]

In [None]:
num_round = 100
bst_gbtree_raw = xgb.train(param_gbtree, dtrain, num_round, evallist, early_stopping_rounds=5)

In [None]:
bst_gbtree_raw.save_model('XGBoosting_GBtree.model')

## Load Data with WT

In [None]:
train_data_WT = np.empty((2,*train_data.shape))
for j in tqdm(range(len(train_data[0]))):
    A, D = pywt.wavelet_transform(train_data[:,j])
    train_data_WT[0,:,j] = A[:]
    train_data_WT[1,:,j] = D[:]

In [None]:
val_data_WT = np.empty((2,*val_data.shape))
for j in tqdm(range(len(val_data[0]))):
    A, D = pywt.wavelet_transform(val_data[:,j])
    val_data_WT[0,:,j] = A[:]
    val_data_WT[1,:,j] = D[:]

In [None]:
train_data_WT_merge = np.concatenate((train_data_WT[0],train_data_WT[1]),1)
val_data_WT_merge = np.concatenate((val_data_WT[0],val_data_WT[1]),1)

## Random Forest for Data with WT

In [None]:
rf_WT = RandomForestRegressor(10, max_depth=7, n_jobs=10)
rf_WT.fit(train_data_WT_merge, train_label)
val_pred1=rf1.predict(val_data_WT_merge)
((val_label-val_pred1)**2).mean()
with open('Randomforest_WT.pkl','wb') as fh1:
    pickle.dump(rf_WT, fh1)

## Adaboost for Data with WT

In [None]:
ada_WT = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),n_estimators=20,learning_rate=1)
ada_WT.fit(train_data_WT_merge, train_label)
val_pred1=ada_WT.predict(val_data_WT_merge)
((val_label-val_pred1)**2).mean()
with open('AdaBoostReg_WT.pkl','wb') as fh:
    pickle.dump(ada_WT, fh)

## XGboost for Data with WT

In [None]:
dtrain = xgb.DMatrix(train_data_WT_merge, label=train_label)
dval = xgb.DMatrix(val_data_WT_merge, label=val_label)

### Parameters

In [None]:
param_gbtree = {
    # General Parameters:
    'booster':'gbtree',
    # For GBtree
    'eta':0.1,
    'gamma': 0,
    'max_depth': 5,
    'min_child_weight': 1,
    'subsample': 0.9,
    'lambda': 0.5,
    'alpha': 0,
    'tree_method': 'auto', 
    'num_parallel_tree': 1, # For Random Forest
    # Learning Task
#     'objective': 'reg:squarederror',
    'eval_metric': ['rmse']
}
evallist = [(dtrain, 'train'),(dval, 'eval')]

In [None]:
num_round = 100
bst_gbtree_WT = xgb.train(param_gbtree, dtrain, num_round, evallist, early_stopping_rounds=5)

In [None]:
bst_gbtree_WT.save_model('XGBoosting_GBtree_WT.model')

## Load data with DFS

In [None]:
df_train_dfs = pd.read_csv('DataSet/Dataset_DFSn.csv')
df_train_dfs = df_train_dfs.values
indicators = df_train.columns.values[:108].tolist()
market_stat = ['midPrice',  'LastPrice', 'Volume', 'LastVolume', 'Turnover', 'LastTurnover',
       'OpenInterest', 'UpperLimitPrice', 'LowerLimitPrice',] # 'am_pm',
#        'UpdateMinute']
features = indicators + market_stat
play_data = ['indicator4','indicator88','indicator2','indicator83','indicator75','midPrice',  'LastPrice', 'Volume', 'LastVolume', 'Turnover', 'LastTurnover',
       'OpenInterest', 'UpperLimitPrice', 'LowerLimitPrice']
for i in range(len(play_data)):
    features = features + [play_data[i]+'_mean',play_data[i]+'_std',play_data[i]+'_diff']
df_train_dfs = df_train_dfs[features].values

In [None]:
df_val_dfs = pd.read_csv('DataSet/Dataset_DFSn_val.csv')
df_val_dfs = df_val_dfs[features].values

## Random Forest for Data with DFS

In [None]:
rf_dfs = RandomForestRegressor(10, max_depth=7, n_jobs=10)
rf_dfs.fit(df_train_dfs, train_label)
val_pred2=rf_dfs.predict(df_val_dfs)
((val_label-val_pred2)**2).mean()
with open('Randomforest_DFS.pkl','wb') as fh:
    pickle.dump(rf_dfs, fh)

## Adaboost for Data with DFS

In [None]:
ada_DFS = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2),n_estimators=20,learning_rate=1)
ada_DFS.fit(df_train_dfs, train_label)
val_pred2=ada_DFS.predict(df_val_dfs)
((val_label-val_pred2)**2).mean()
with open('AdaBoostReg_DFS.pkl','wb') as fh:
    pickle.dump(ada_DFS, fh)

## XGboost for Data with DFS

In [None]:
dtrain = xgb.DMatrix(df_train_dfs, label=train_label)
dval = xgb.DMatrix(df_val_dfs, label=val_label)

In [None]:
param_gbtree = {
    # General Parameters:
    'booster':'gbtree',
    # For GBtree
    'eta':0.1,
    'gamma': 0,
    'max_depth': 5,
    'min_child_weight': 1,
    'subsample': 0.9,
    'lambda': 0.5,
    'alpha': 0,
    'tree_method': 'auto', 
    'num_parallel_tree': 1, # For Random Forest
    # Learning Task
#     'objective': 'reg:squarederror',
    'eval_metric': ['rmse']
}

In [None]:
evallist = [(dtrain, 'train'),(dval, 'eval')]

In [None]:
num_round = 200
bst_gbtree_DFS = xgb.train(param_gbtree, dtrain, num_round, evallist, early_stopping_rounds=5)

In [None]:
bst_gbtree_DFS.save_model('XGBoosting_GBtree_WT.model')