In [4]:
import numpy as np
import pandas as pd
import scipy as sc

from collections import Counter
from imblearn.under_sampling import RandomUnderSampler
from numpy import random
from meassures import get_meassure
from meassures import get_sensitive
from meassures import get_specificity
from sklearn import metrics
from sklearn.naive_bayes import ComplementNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from test_data import local_test

In [2]:
# Se cargan los datos
data = pd.read_csv('full_dataset.csv')

In [3]:
# Se separan los datos en conjuntos por clase
major_class = data[data.classes == 0]
minor_class = data[data.classes == 1]

# Se hace un sobremuestreo sobre la clase minoritaria
minor_class_upsampled = resample(minor_class, replace=True, n_samples=350, random_state=42)

# Se unen los conjuntos en un nuevo conjunto único de clases
data_new = pd.concat([major_class, minor_class_upsampled])

# Se separan características de etiquetas
features = data_new.iloc[1:, 0:-1]
target = data_new.iloc[1:, -1]

# Se hace sub muestreo de conjunto mayoritario
rus = RandomUnderSampler(random_state=0)
features, target = rus.fit_resample(features, target)
print('Resampled dataset shape %s' % Counter(target))

Resampled dataset shape Counter({0: 350, 1: 350})


In [7]:
folds = [5, 10, 15, 20, 30, 50]

for fold in folds:
    random.seed(12345)
    efficiency_train = np.zeros(fold)
    efficiency_val = np.zeros(fold)
    skf = StratifiedKFold(n_splits=fold, random_state=None, shuffle=False)

    j = 0
    for train, test in skf.split(features, target):
        x_train = features[train, :]
        y_train = target[train]
        x_test = features[test, :]
        y_test = target[test]

        # NB
        model = ComplementNB()
        model.fit(x_train, y_train)


        # Predicción
        y_est_train = model.predict(x_train)
        y_est_test = model.predict(x_test)
        print(model.predict(local_test))

        score_train = metrics.accuracy_score(y_train, y_est_train)
        score_test = metrics.accuracy_score(y_test, y_est_test)
        
        # Medimos Sensibilidad y Especificidad
        tp, fp, tn, fn = get_meassure(y_test, y_est_test)
        sensitive = get_sensitive(tp, fn)
        specificity = get_specificity(tn, fp)

        
        j += 1

    print('-------------------------------------')
    print('Entrenamiento: {0}'.format(score_train))
    print('Validación: {0}'.format(score_test))
    print('-------------------------------------')
    print('-------------------------------------')
    print('Sensibilidad: {0}, Especificidad: {1}'.format(sensitive, specificity))
    print('-------------------------------------')

[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 0 0 0 0]
[1 0 1 1 1 0 1 1 0 0]
[1 1 1 1 1 0 1 0 0 0]
-------------------------------------
Entrenamiento: 0.7517857142857143
Validación: 0.6857142857142857
-------------------------------------
-------------------------------------
Sensibilidad: 0.8428571428571429, Especificidad: 0.5285714285714286
-------------------------------------
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 0 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 0 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
-------------------------------------
Entrenamiento: 0.7444444444444445
Validación: 0.7
-------------------------------------
-------------------------------------
Sensibilidad: 0.8571428571428571, Especificidad: 0.5428571428571428
-------------------------------------
[1 1 1 1 1 0 1 0 0 0]
[1 0 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 1 1 1 0 1 0 0 0]
[1 1 