In [2]:
!pip install git+https://github.com/Menelau/DESlib

Collecting git+https://github.com/Menelau/DESlib
  Cloning https://github.com/Menelau/DESlib to /tmp/pip-req-build-acp4_1py
Building wheels for collected packages: DESlib
  Running setup.py bdist_wheel for DESlib ... [?25ldone
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-ybpsct6x/wheels/8d/97/f7/014976b54546aa3fc60da2446f9ab6f57727cf478172007b06
Successfully built DESlib


In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from matplotlib.pylab import rcParams
from sklearn.preprocessing import StandardScaler
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import cohen_kappa_score
from itertools import combinations
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from imblearn.metrics import geometric_mean_score
from sklearn.metrics import f1_score

import sys
sys.path.insert(0, '../Lista2')
import kdn
import ih_classifier

from deslib.dcs.mcb import MCB
from deslib.dcs.ola import OLA
from deslib.des.knora_e import KNORAE
from deslib.des.des_knn import DESKNN

### Pre processing

In [131]:
data = pd.read_csv('../kc1.csv')

In [132]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
len(X)

2109

In [133]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

### Analysis

In [134]:
skf = StratifiedKFold(n_splits=10, shuffle=True)
metrics = np.zeros((5,4))

for train_index, test_index in skf.split(X, y):
       
    X_Train, X_test = X[train_index], X[test_index]
    y_Train, y_test = y[train_index], y[test_index]

    sm = SMOTE()
    X_Train, y_Train = sm.fit_sample(X_Train, y_Train)
    
    skf_val = StratifiedKFold(n_splits=5, shuffle=True)
    for train, val_index in skf.split(X_Train, y_Train):
        X_train, X_val = X_Train[train], X_Train[val_index]
        y_train, y_val = y_Train[train], y_Train[val_index]
        break
    
#-------------------------------IH MODEL--------------------------------
    ih_model = ih_classifier.IHClassifier()
    ih_model.fit(X_Train, y_Train)
    
#-----------------------------------------------------------------------

    bg = BaggingClassifier(Perceptron(max_iter = 150, tol = 0.001),
                         n_estimators = 100)
    bg.fit(X_train, y_train)
    
#-------------------------------DCS & DES-------------------------------
    ola = OLA(bg)
    ola.fit(X_val, y_val)
    
    mcb = MCB(bg)
    mcb.fit(X_val, y_val)
    
    knorae = KNORAE(bg)
    knorae.fit(X_val, y_val)
    
    desknn = DESKNN(bg)
    desknn.fit(X_val, y_val)
    
    models = [ih_model, ola, mcb, knorae, desknn]
            
#---------------------------------------------------------
    
    for i, model in enumerate(models):
        metrics[i, :] += np.array([accuracy_score(y_test, model.predict(X_test)),
                            roc_auc_score(y_test, model.predict(X_test)),
                            geometric_mean_score(y_test, model.predict(X_test)),
                            f1_score(y_test, model.predict(X_test))])
metrics = metrics/skf.n_splits

Easy-False: 1199 Easy-True: 1448 Hard-False: 405 Hard-True: 156
Easy-False: 1203 Easy-True: 1460 Hard-False: 401 Hard-True: 144
Easy-False: 1199 Easy-True: 1440 Hard-False: 405 Hard-True: 164
Easy-False: 1211 Easy-True: 1452 Hard-False: 394 Hard-True: 153
Easy-False: 1194 Easy-True: 1479 Hard-False: 411 Hard-True: 126
Easy-False: 1205 Easy-True: 1432 Hard-False: 400 Hard-True: 173
Easy-False: 1191 Easy-True: 1444 Hard-False: 414 Hard-True: 161
Easy-False: 1205 Easy-True: 1441 Hard-False: 400 Hard-True: 164
Easy-False: 1212 Easy-True: 1445 Hard-False: 393 Hard-True: 160
Easy-False: 1195 Easy-True: 1470 Hard-False: 410 Hard-True: 135


In [135]:
metrics

array([[0.78851003, 0.70188509, 0.68889127, 0.45659349],
       [0.726026  , 0.69368917, 0.69025005, 0.4227928 ],
       [0.71982212, 0.68385361, 0.67359649, 0.4149731 ],
       [0.71313993, 0.6736805 , 0.66994204, 0.39900632],
       [0.71602168, 0.71015045, 0.70859549, 0.43337051]])

### Results

In [1]:
rcParams['figure.figsize'] = 15, 5
barWidth = 0.15
# Set position of bar on X axis
r1 = np.arange(metrics.shape[0])
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]

# Make the plot
plt.bar(r1, metrics[:, 0], width=barWidth, edgecolor='white', label='Acurácia')
plt.bar(r2, metrics[:, 1], width=barWidth, edgecolor='white', label='AUC')
plt.bar(r3, metrics[:, 2], width=barWidth, edgecolor='white', label='G-Mean')
plt.bar(r4, metrics[:, 3], width=barWidth, edgecolor='white', label='F-Measure')

# Add xticks on the middle of the group bars
plt.ylim([0., 1.01])
plt.xlim([-0.2, 4.7])
plt.ylabel('Escore')
plt.xticks([r + barWidth for r in range(metrics.shape[0])], ['Modelo IH', 'OLA', 'MCB', 'KNORA-E', 'DES-KNN'])

# Create legend & Show graphic
plt.legend(loc = 'lower right')
plt.title('Comparação entre modelos')
plt.show()

df = pd.DataFrame(data = metrics, columns = ['Acurácia', 'AUC', 'G-Mean', 'F-Measure'], index = ['Modelo IH', 'OLA', 'MCB','KNORA-E', 'DES-KNN'])
print(df)

NameError: name 'rcParams' is not defined