In [32]:
import numpy as np
import json
import csv
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
from sklearn import svm
import xgboost as xgb
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn.ensemble import VotingRegressor, VotingClassifier
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [33]:
def create_models():
    lr = LogisticRegression(class_weight="balanced", solver="liblinear",  penalty="l2", C=0.0001)
    
    sv = svm.SVR(kernel="poly",
                                 degree=8,
                                 gamma=0.03521735642853326,
                                 coef0=0.34010389238140537,
                                 tol=1e-05,
                                 C=0.001,
                                 epsilon=0.14620884632948022,
                                verbose=True,)

    xgboost = xgb.XGBRegressor(silent= True, 
                           max_depth=1,
                           learning_rate=0.14544434403253392,
                           n_estimators = 72,
                           gamma=0.4356018082020117,
                           reg_lambda=2.931451663505623,
                           reg_alpha=0.19045302677956732)

    lgbm = lgb.LGBMRegressor(
        verbosity=1,
        boost_from_average='true',
        metric='auc',
        bagging_freq=5,
        max_depth=19,
        learning_rate=0.09802221664336347,
        n_estimators=703,
        subsample=0.7596658467659505,
        reg_lambda=0.4087114877863393,  
    )
    
    lda = LDA(solver="eigen", shrinkage=1)

    estimators_rgs = [('sv', sv), ('xgboost', xgboost)]
    
    return estimators_rgs

In [34]:
y = 2019
path = '../../data/std_data/'
x_train = pd.read_pickle(path +'train/{}_x.pkl'.format(str(y))).values
x_test = pd.read_pickle(path +'test/{}_x.pkl'.format(str(y))).values
y_train = pd.read_pickle(path +'train/{}_y.pkl'.format(str(y))).values
y_test = pd.read_pickle(path +'test/{}_y.pkl'.format(str(y))).values

In [35]:
y_train.shape

(206,)

In [36]:
y_train
xgboost = xgb.XGBRegressor(silent= True, 
                           max_depth=1,
                           learning_rate=0.14544434403253392,
                           n_estimators = 72,
                           gamma=0.4356018082020117,
                           reg_lambda=2.931451663505623,
                           reg_alpha=0.19045302677956732)



In [37]:
x_train.shape

(206, 83)

In [38]:
y_train.shape

(206,)

In [39]:
def main():
    
    
    path = '../../data/dataframes/std_data/'
#     df =  pd.read_pickle('../../data/dataframes/unificated_data_set.pkl')
    
    cm_all = np.zeros((2, 2))
    
    # 予測した確率全体を格納
    probs_all = np.array([])
    y_true_all = np.array([])
    
    for y in range(1978, 2020):
        
        # データの生成
        x_train = pd.read_pickle(path +'train/{}_x.pkl'.format(str(y))).values
        x_test = pd.read_pickle(path +'test/{}_x.pkl'.format(str(y))).values
        y_train = pd.read_pickle(path +'train/{}_y.pkl'.format(str(y))).values
        y_test = pd.read_pickle(path +'test/{}_y.pkl'.format(str(y))).values
       
        # 学習
        estimators= create_models()
        
        vr = VotingRegressor(estimators)

        vr.fit(x_train, y_train)
        
        # 予測(確率)
        probs = vr.predict(x_test)
        probs_all = np.hstack((probs_all, probs))
        y_true_all = np.hstack((y_true_all, y_test))
        
        print(probs)

        # 混同行列
        y_pred = np.where((probs ==  max(probs)), 1, 0) #確率→0/1
        cm = confusion_matrix(y_true=y_test, y_pred=y_pred)
        cm_all += cm
    
    auc = roc_auc_score(y_true_all, probs_all)
    fpr, tpr, thresholds = roc_curve(y_true_all, probs_all)
    
    plot_roc_curve(fpr, tpr, auc)
    
    print("len: {0} , {1}".format(len(y_true_all), len(probs_all) ))
    print("confusion_matrix: ")
    print(cm_all)
    print("AUC: ")
    print(auc)
    print()
    return

In [None]:
main()

[LibSVM][0.17203937 0.1519188  0.15189164 0.1519778  0.15189167]
[LibSVM][0.18726161 0.15116901 0.15116904 0.20528914 0.15116908]
[LibSVM]