In [9]:
from data_util import *
from sklearn.metrics import *
from imbens.metrics import *
from imbens.ensemble import *
from sklearn.model_selection import train_test_split, StratifiedKFold


In [10]:
# Load data
X, y = get_wine1()

Wine 类别分布: Counter({2: 71, 1: 59, 3: 48})
0      1
1      1
2      1
3      1
4      1
      ..
173    0
174    0
175    0
176    0
177    0
Name: class, Length: 178, dtype: int64
Wine 类别分布: Counter({0: 119, 1: 59})


In [11]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = SelfPacedEnsembleClassifier(n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._under_sampling.self_paced_ensemble.SelfPacedEnsembleClassifier'>
Accuracy: 0.9773015873015873
F1: 0.9738120435795208
Precision: 0.9803076923076922
Recall: 0.9700757575757578
G-mean: 0.9690557057586862
AUC: 0.992086627140975
AUPR: 0.9858253674430145


In [12]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = BalanceCascadeClassifier(n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._under_sampling.balance_cascade.BalanceCascadeClassifier'>
Accuracy: 0.9547619047619047
F1: 0.9482533512036617
Precision: 0.9557142857142857
Recall: 0.9448945981554676
G-mean: 0.9430612717236377
AUC: 0.9911684782608695
AUPR: 0.9743537018537018


In [13]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = UnderBaggingClassifier(n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._under_sampling.under_bagging.UnderBaggingClassifier'>
Accuracy: 0.9604761904761905
F1: 0.9550560722920972
Precision: 0.9597142857142856
Recall: 0.9539855072463768
G-mean: 0.9523687538745194
AUC: 0.9917421497584542
AUPR: 0.9845627411803882


In [14]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = EasyEnsembleClassifier(n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._under_sampling.easy_ensemble.EasyEnsembleClassifier'>
Accuracy: 0.9546031746031746
F1: 0.9466673714814325
Precision: 0.9587692307692308
Recall: 0.9401515151515152
G-mean: 0.9375773201897417
AUC: 0.9955862977602108
AUPR: 0.9915469631378724


In [16]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = RUSBoostClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._under_sampling.rus_boost.RUSBoostClassifier'>
Accuracy: 0.954920634920635
F1: 0.9485599153060498
Precision: 0.9541501831501831
Recall: 0.95
G-mean: 0.9477539280084473
AUC: 0.9971618357487924
AUPR: 0.9946581196581196


In [20]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = BalancedRandomForestClassifier(n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._under_sampling.balanced_random_forest.BalancedRandomForestClassifier'>
Accuracy: 0.9887301587301588
F1: 0.9873968547641073
Precision: 0.9881410256410257
Recall: 0.9875
G-mean: 0.9872744236257798
AUC: 1.0
AUPR: 1.0


In [22]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = AdaCostClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._reweighting.adacost.AdaCostClassifier'>
Accuracy: 0.9661904761904762
F1: 0.9613567209848431
Precision: 0.9661630036630037
Recall: 0.9609848484848484
G-mean: 0.9596666519237056
AUC: 0.9978864734299517
AUPR: 0.9961538461538462


In [23]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = AdaUBoostClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._reweighting.adauboost.AdaUBoostClassifier'>
Accuracy: 0.9547619047619047
F1: 0.9474240433809615
Precision: 0.9587912087912087
Recall: 0.9443181818181818
G-mean: 0.9413863111293258
AUC: 0.9955862977602108
AUPR: 0.9921097337006428


In [25]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accs = []
f1s = []
precs = []
recs = []
gmeans = []
aucs = []
auprs = []

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model = AsymBoostClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    accs.append(accuracy_score(y_test, y_pred))
    f1s.append(f1_score(y_test, y_pred, average='macro'))
    precs.append(precision_score(y_test, y_pred, average='macro'))
    recs.append(recall_score(y_test, y_pred, average='macro'))
    gmeans.append(geometric_mean_score(y_test, y_pred))
    aucs.append(roc_auc_score(y_test, y_pred_proba))
    auprs.append(average_precision_score(y_test, y_pred_proba))
    
print('model:', model.__class__)
print('Accuracy:', np.mean(accs))
print('F1:', np.mean(f1s))
print('Precision:', np.mean(precs))
print('Recall:', np.mean(recs))
print('G-mean:', np.mean(gmeans))
print('AUC:', np.mean(aucs))
print('AUPR:', np.mean(auprs))

model: <class 'imbens.ensemble._reweighting.asymmetric_boost.AsymBoostClassifier'>
Accuracy: 0.9547619047619047
F1: 0.9474240433809615
Precision: 0.9587912087912087
Recall: 0.9443181818181818
G-mean: 0.9413863111293258
AUC: 0.9963438735177865
AUPR: 0.9937626262626262
