In [8]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.0.3-py3-none-any.whl (348 kB)
[K     |████████████████████████████████| 348 kB 8.2 MB/s 
Collecting cmaes>=0.8.2
  Downloading cmaes-0.9.0-py3-none-any.whl (23 kB)
Collecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 9.8 MB/s 
[?25hCollecting alembic>=1.5.0
  Downloading alembic-1.8.1-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 75.9 MB/s 
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 7.2 MB/s 
Collecting autopage>=0.4.0
  Downloading autopage-0.5.1-py3-none-any.whl (29 kB)
Collecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.11.0-py2.py3-none-any.whl (112 kB)
[K     |████████████████████████████████| 112 kB 5

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# load data
import pandas as pd
data=pd.read_csv('/content/drive/MyDrive/data/weighted_data.csv', encoding='cp949', index_col=0)
target=pd.read_csv('/content/drive/MyDrive/data/y_data.csv', encoding='cp949', index_col=0)

In [2]:
# import models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier

models_li = [RandomForestClassifier(), GradientBoostingClassifier(), ExtraTreesClassifier(), XGBClassifier()]

In [3]:
# standard_scaling
from sklearn.preprocessing import StandardScaler
ssc = StandardScaler()
data_ssc=ssc.fit_transform(data)

In [4]:
RandomForestClassifier().get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [5]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score,KFold
kfold = KFold(n_splits=3,random_state=42,shuffle=True)

In [6]:
# RandomForestClassifier_parameter_tuning
# based on optuna

import optuna

def rfc_object(trial):
    prms={
    'bootstrap': trial.suggest_categorical('bootstrap',[True, False]),
    'max_depth': trial.suggest_int('max_depth',10,100),
    'min_samples_leaf': trial.suggest_int('min_samples_leaf',1,5),
    'min_samples_split': trial.suggest_int('min_samples_split',2,10),
    'n_estimators': trial.suggest_int('n_estimators',200,2000,step=200)
    }
    model =RandomForestClassifier(**prms)
    result = cross_val_score(
        model,
        data_ssc,
        target['9일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="maximize")
study.optimize(rfc_object,n_trials=20)

[32m[I 2022-11-18 04:26:49,739][0m A new study created in memory with name: no-name-15b6c41b-6869-4409-9669-479738cbf255[0m
[32m[I 2022-11-18 04:27:08,587][0m Trial 0 finished with value: 0.5115286748874536 and parameters: {'bootstrap': False, 'max_depth': 42, 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 2000}. Best is trial 0 with value: 0.5115286748874536.[0m
3 fits failed out of a total of 3.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/di

In [8]:
rfc_best_prms=study.best_params
rfc_best_prms

{'bootstrap': True,
 'max_depth': 11,
 'min_samples_leaf': 2,
 'min_samples_split': 10,
 'n_estimators': 800}

In [18]:
rfc=RandomForestClassifier(**rfc_best_prms)

In [19]:
rfc.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 11,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 2,
 'min_samples_split': 10,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 800,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [11]:
# GradientBoostingClassifier_parameter_tuning
# based on optuna

import optuna

def gbc_object(trial):
    prms={
    'n_estimators': trial.suggest_int('n_estimators',200,2000,step=200),
    'max_depth': trial.suggest_int('max_depth',10,1000),
    "learning_rate": trial.suggest_float("learning_rate",1e-5,1),
    }
    model =GradientBoostingClassifier(**prms)
    result = cross_val_score(
        model,
        data_ssc,
        target['9일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="maximize")
study.optimize(gbc_object,n_trials=20)

[32m[I 2022-11-18 04:29:53,198][0m A new study created in memory with name: no-name-84f4f097-2230-4094-8567-77ede92818fe[0m
[32m[I 2022-11-18 04:29:59,817][0m Trial 0 finished with value: 0.49352123703268735 and parameters: {'n_estimators': 1200, 'max_depth': 946, 'learning_rate': 0.993757058885463}. Best is trial 0 with value: 0.49352123703268735.[0m
[32m[I 2022-11-18 04:30:13,776][0m Trial 1 finished with value: 0.4706009003718927 and parameters: {'n_estimators': 1400, 'max_depth': 965, 'learning_rate': 0.17053235237873826}. Best is trial 0 with value: 0.49352123703268735.[0m
[32m[I 2022-11-18 04:30:18,728][0m Trial 2 finished with value: 0.5114503816793893 and parameters: {'n_estimators': 800, 'max_depth': 142, 'learning_rate': 0.6240491602066651}. Best is trial 2 with value: 0.5114503816793893.[0m
[32m[I 2022-11-18 04:31:50,392][0m Trial 3 finished with value: 0.4705226071638286 and parameters: {'n_estimators': 1000, 'max_depth': 41, 'learning_rate': 0.006782582830190

In [12]:
gbc_best_prms=study.best_params
gbc_best_prms

{'n_estimators': 800, 'max_depth': 142, 'learning_rate': 0.6240491602066651}

In [20]:
gbc=GradientBoostingClassifier(**gbc_best_prms)
gbc.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.6240491602066651,
 'loss': 'deviance',
 'max_depth': 142,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 800,
 'n_iter_no_change': None,
 'random_state': None,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [14]:
# ExtraTreesClassifier_parameter_tuning
# based on optuna

import optuna

def ettc_object(trial):
    prms={
    'n_estimators': trial.suggest_int('n_estimators',200,2000,step=200),
    'max_depth': trial.suggest_int('max_depth',10,1000),
    }
    model =ExtraTreesClassifier(**prms)
    result = cross_val_score(
        model,
        data_ssc,
        target['9일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="maximize")
study.optimize(ettc_object,n_trials=20)

[32m[I 2022-11-18 04:33:35,979][0m A new study created in memory with name: no-name-1eb1826d-3ff1-467c-bddc-8dedfe71d296[0m
[32m[I 2022-11-18 04:33:38,616][0m Trial 0 finished with value: 0.49614405950283813 and parameters: {'n_estimators': 600, 'max_depth': 839}. Best is trial 0 with value: 0.49614405950283813.[0m
[32m[I 2022-11-18 04:33:39,514][0m Trial 1 finished with value: 0.5140732041495399 and parameters: {'n_estimators': 200, 'max_depth': 804}. Best is trial 1 with value: 0.5140732041495399.[0m
[32m[I 2022-11-18 04:33:44,586][0m Trial 2 finished with value: 0.5115286748874536 and parameters: {'n_estimators': 1000, 'max_depth': 298}. Best is trial 1 with value: 0.5140732041495399.[0m
[32m[I 2022-11-18 04:33:48,392][0m Trial 3 finished with value: 0.5038363671951459 and parameters: {'n_estimators': 600, 'max_depth': 895}. Best is trial 1 with value: 0.5140732041495399.[0m
[32m[I 2022-11-18 04:33:49,299][0m Trial 4 finished with value: 0.4935799569387355 and param

In [15]:
ettc_best_prms=study.best_params
ettc_best_prms

{'n_estimators': 200, 'max_depth': 147}

In [21]:
ettc=ExtraTreesClassifier(**ettc_best_prms)
ettc.get_params()

{'bootstrap': False,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 147,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [16]:
# XGBClassifier_parameter_tuning
# based on optuna

import optuna

def xgbc_object(trial):
    prms={
    'min_child_weight': trial.suggest_int('min_child_weight',1,10),
    'gamma': trial.suggest_float('gamma',0.5,5),
    "subsample": trial.suggest_float("subsample",1e-1,1),
    'colsample_bytree': trial.suggest_float('colsample_bytree',1e-1,1),
    'max_depth': trial.suggest_int('max_depth', 5, 30)
    }
    model =XGBClassifier(**prms)
    result = cross_val_score(
        model,
        data_ssc,
        target['9일 뒤 종가'],
        cv=kfold
    )
    return np.mean(result)

study = optuna.create_study(direction="maximize")
study.optimize(xgbc_object,n_trials=20)

[32m[I 2022-11-18 04:35:02,551][0m A new study created in memory with name: no-name-665a63d9-d7b6-425e-b9c8-6da64a209875[0m
[32m[I 2022-11-18 04:35:04,157][0m Trial 0 finished with value: 0.5089841456253671 and parameters: {'min_child_weight': 1, 'gamma': 1.1497952302996337, 'subsample': 0.9888344816118727, 'colsample_bytree': 0.3403096188239668, 'max_depth': 14}. Best is trial 0 with value: 0.5089841456253671.[0m
[32m[I 2022-11-18 04:35:04,964][0m Trial 1 finished with value: 0.5167155999217068 and parameters: {'min_child_weight': 2, 'gamma': 1.072629987718089, 'subsample': 0.7106236950055019, 'colsample_bytree': 0.32426229272321583, 'max_depth': 9}. Best is trial 1 with value: 0.5167155999217068.[0m
[32m[I 2022-11-18 04:35:05,319][0m Trial 2 finished with value: 0.4911528674887453 and parameters: {'min_child_weight': 9, 'gamma': 0.9211440567757445, 'subsample': 0.7603300547474644, 'colsample_bytree': 0.19104307825718805, 'max_depth': 13}. Best is trial 1 with value: 0.5167

In [17]:
xgbc_best_prms=study.best_params
xgbc_best_prms

{'min_child_weight': 4,
 'gamma': 0.5153854345979962,
 'subsample': 0.8578483603632772,
 'colsample_bytree': 0.8659792218360167,
 'max_depth': 15}

In [22]:
xgbc=XGBClassifier(**xgbc_best_prms)
xgbc.get_params()

{'base_score': 0.5,
 'booster': 'gbtree',
 'colsample_bylevel': 1,
 'colsample_bynode': 1,
 'colsample_bytree': 0.8659792218360167,
 'gamma': 0.5153854345979962,
 'learning_rate': 0.1,
 'max_delta_step': 0,
 'max_depth': 15,
 'min_child_weight': 4,
 'missing': None,
 'n_estimators': 100,
 'n_jobs': 1,
 'nthread': None,
 'objective': 'binary:logistic',
 'random_state': 0,
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': None,
 'silent': None,
 'subsample': 0.8578483603632772,
 'verbosity': 1}