In [2]:
from models import BootstrapModel
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from skopt.space import Real, Categorical, Integer

from xgboost import XGBClassifier

import joblib

In [3]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

In [None]:
y_train = train["ProdTaken"].astype(float)

x_train = train.drop(columns=["ProdTaken"], axis=1)

In [5]:
# 前処理

# train_test_split は BootstrapModelクラス内で実行されるからいらない。

In [None]:
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
test = test.to_numpy()

# モデル訓練

In [None]:
svm = BootstrapModel(x_train, y_train, StandardScaler(), SVC(kernel='rbf', probability=True), roc_auc_score, random_state=42)

svm.samples(10, 100, 1)

search_spaces = {
    'svc__C': Real(10**(-3), 10**4, prior='log-uniform'),
    'svc__gamma': Real(10**(-3), 10**4, prior='log-uniform')
}

svm.train(search_spaces, 5, 50)

svm.val_pred(10)

 .samples / .val_pred の引数(パラメータ)の探索したときは、 .record でパラメータとaucスコアの記録をしてくれる。 \
 連続で使用しても、上書きされずに追加で書かれる。

In [None]:
logistic = BootstrapModel(x_train, y_train, StandardScaler(), LogisticRegression(solver='liblinear'), roc_auc_score, random_state=42)

logistic.samples(10, 100, 1)

search_spaces = {
    'logisticregression__penalty': Categorical(['l1', 'l2']),
    'logisticregression__C': Real(10**(-4), 10**4, prior='log-uniform')
}

logistic.train(search_spaces, 5, 50)

logistic.val_pred(10)

In [None]:
knn = BootstrapModel(x_train, y_train, StandardScaler(), KNeighborsClassifier(), roc_auc_score, random_state=42)

knn.samples(10, 100, 1)

search_spaces = {
    'kneighborsclassifier__n_neighbors': Real(1, 50, prior='uniform'),
    'kneighborsclassifier__weights': Categorical(['uniform', 'distance'])
}

knn.train(search_spaces, 5, 50)

knn.val_pred(10)

In [None]:
xgb = BootstrapModel(x_train, y_train, StandardScaler(), XGBClassifier(), roc_auc_score, random_state=42)

xgb.samples(10, 100, 1)

search_spaces = {
    'xgbclassifier__n_estimators': Integer(10, 200),  # 整数で指定
    'xgbclassifier__learning_rate': Real(10**(-4), 10**0, prior='log-uniform'),
    'xgbclassifier__max_depth': Integer(1, 10)  # 整数で指定
}

xgb.train(search_spaces, 5, 50)

xgb.val_pred(10)

# モデル保存

In [None]:
# モデルを保存する
model_name = svm.record_file_name
train_start_time = svm.current_time

model_save_path = f'../model_save/{model_name}_{train_start_time}.pkl'
joblib.dump(svm.bstrap_models_dict, model_save_path)

In [None]:
model_name = logistic.record_file_name
train_start_time = logistic.current_time

model_save_path = f'../model_save/{model_name}_{train_start_time}.pkl'
joblib.dump(logistic.bstrap_models_dict, model_save_path)

In [None]:
model_name = knn.record_file_name
train_start_time = knn.current_time

model_save_path = f'../model_save/{model_name}_{train_start_time}.pkl'
joblib.dump(knn.bstrap_models_dict, model_save_path)

In [None]:
model_name = xgb.record_file_name
train_start_time = xgb.current_time

model_save_path = f'../model_save/{model_name}_{train_start_time}.pkl'
joblib.dump(xgb.bstrap_models_dict, model_save_path)