In [None]:
# load data
import pandas as pd
data=pd.read_csv('../data/preprocessed_data/weighted_data.csv', encoding='cp949', index_col=0)
target=pd.read_csv('../data/preprocessed_data/y_data.csv', encoding='cp949', index_col=0)

In [None]:
# import models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier

models_li = [RandomForestClassifier(), GradientBoostingClassifier(), ExtraTreesClassifier(), XGBClassifier()]

In [None]:
# standard_scaling
from sklearn.preprocessing import StandardScaler
ssc = StandardScaler()
data_ssc=ssc.fit_transform(data)

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score,KFold
kfold = KFold(n_splits=3,random_state=42,shuffle=True)

In [None]:
# RandomForestClassifier_parameter_tuning
# based on optuna

import optuna

def rfc_object(trial):
    prms={
    'bootstrap': trial.suggest_categorical('bootstrap',[True, False]),
    'max_depth': trial.suggest_int('max_depth',10,1000),
    'max_features': trial.suggest_categorical('max_features',['auto', 'sqrt']),
    'min_samples_leaf': trial.suggest_int('min_samples_leaf',1,5),
    'min_samples_split': trial.suggest_int('min_samples_split',1,10),
    'n_estimators': trial.suggest_int('n_estimators',200,2000,step=200)
    }
    model =RandomForestClassifier(**prms,n_jobs=-1)
    result = cross_val_score(
        model,
        data_ssc,
        target['10일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="minimize")
study.optimize(rfc_object,n_trials=20,n_jobs=-1)

In [None]:
rfc_best_prms=study.best_params()
rfc_best_prms

In [None]:
# GradientBoostingClassifier_parameter_tuning
# based on optuna

import optuna

def gbc_object(trial):
    prms={
    'n_estimators': trial.suggest_int('n_estimators',200,2000,step=200),
    'max_depth': trial.suggest_int('max_depth',10,1000),
    "learning_rate": trial.suggest_float("learning_rate",1e-5,1),
    }
    model =GradientBoostingClassifier(**prms,n_jobs=-1)
    result = cross_val_score(
        model,
        data_ssc,
        target['10일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="minimize")
study.optimize(gbc_object,n_trials=20,n_jobs=-1)

In [None]:
gbc_best_prms=study.best_params()
gbc_best_prms

In [None]:
# ExtraTreesClassifier_parameter_tuning
# based on optuna

import optuna

def ettc_object(trial):
    prms={
    'n_estimators': trial.suggest_int('n_estimators',200,2000,step=200),
    'max_depth': trial.suggest_int('max_depth',10,1000),
    "learning_rate": trial.suggest_float("learning_rate",1e-5,1),
    }
    model =ExtraTreesClassifier(**prms,n_jobs=-1)
    result = cross_val_score(
        model,
        data_ssc,
        target['10일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="minimize")
study.optimize(ettc_object,n_trials=20,n_jobs=-1)

In [None]:
ettc_best_prms=study.best_params()
ettc_best_prms

In [None]:
# XGBClassifier_parameter_tuning
# based on optuna

import optuna

def xgbc_object(trial):
    prms={
    'min_child_weight': trial.suggest_int('min_child_weight',1,10),
    'gamma': trial.suggest_float('gamma',0.5,5),
    "subsample": trial.suggest_float("subsample",1e-1,1),
    'colsample_bytree': trial.suggest_float('colsample_bytree',1e-1,1),
    'max_depth': trial.suggest_int('max_depth', 5, 30)
    }
    model =XGBClassifier(**prms,n_jobs=-1)
    result = cross_val_score(
        model,
        data_ssc,
        target['10일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="minimize")
study.optimize(xgbc_object,n_trials=20,n_jobs=-1)

In [None]:
xgbc_best_prms=study.best_params()
xgbc_best_prms