In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.simplefilter('ignore')


In [2]:
#データセット準備
import pandas as pd
#ワインのクラス分け，クラス1をdropして2,3の分類をAlcoholとOD280/OD315で行う
df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/wine/wine.data',
                      header=None)

df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']

# drop 1 class
df_wine = df_wine[df_wine['Class label'] != 1]

y = df_wine['Class label'].values
X = df_wine[['Alcohol', 'OD280/OD315 of diluted wines']].values
#ラベルエンコードで2値化
le = LabelEncoder()
y = le.fit_transform(y)
#標準化
sc = StandardScaler()
X_std = sc.fit_transform(X)

#データセットを学習用と検証用に分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)

In [None]:
#lightGBM,CatBoost,ランダムフォレスト
!pip install lightgbm
!pip install catboost
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier

#LGBM
mod_lgbm = lgb.LGBMClassifier(num_leaves = 31,
                           num_trees = 100,
                           objective = 'binary',
                           metric = 'binary_logloss',
                           silent=False,
                              verbose = -1)
#CatBoost
mod_cat = CatBoostClassifier(iterations=100,
                           eval_metric = 'Kappa',
                           learning_rate=0.01,
                           l2_leaf_reg = 9,
                           depth=10,
                           one_hot_max_size = 50,
                           loss_function='Logloss',
                            silent=True)
#RandomForest
mod_rand = RandomForestClassifier(n_estimators=10, random_state=1)

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m


In [None]:
#VoitingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import roc_auc_score

vc1 = VotingClassifier(estimators=[('lgbm', mod_lgbm), ('cat', mod_cat), ('rand', mod_rand)], voting='hard')
vc2 = VotingClassifier(estimators=[('lgbm', mod_lgbm), ('cat', mod_cat), ('rand', mod_rand)], voting='soft',flatten_transform=True)
vc3 = VotingClassifier(estimators=[('lgbm', mod_lgbm), ('cat', mod_cat), ('rand', mod_rand)], voting='soft',flatten_transform=True,weights=[5,10,30])
vc4 = VotingClassifier(estimators=[('lgbm', mod_lgbm), ('cat', mod_cat), ('rand', mod_rand)], voting='hard',weights=[5,10,30])

name_list = ['LGBM', 'CatBoost', 'Randomforest', 'voting=hard','voting=soft','voting=soft,weights=True','voting=hard,weights=True']
est_list = [mod_lgbm, mod_cat, mod_rand, vc1,vc2,vc3,vc4]
for mod, name in zip(est_list, name_list):
    mod.fit(X_train, y_train)
    print('AUC. : %.3f [%s]' % (roc_auc_score(y_true=y_test,y_score=mod.predict(X_test)),name))