In [2]:
from __future__ import division
import numpy as np
import scipy as sc
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
from sklearn.decomposition import PCA
import pandas as pd
import math
from numpy import random
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score
from imblearn.metrics import geometric_mean_score
from sklearn.metrics import f1_score

In [3]:
df = pd.read_csv('../data/CTGsmt.csv', header=None)
data = df.to_numpy()
# Features matrix
X = data[:,0:22]
Y = data[:,23]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=0, stratify=Y, shuffle=True)

In [5]:
def prueba_svm(ker='linear',conf=0.001,gam=0.0):
    Folds = 4
    random.seed(19680801)
    EficienciaTrain = np.zeros(Folds)
    EficienciaVal = np.zeros(Folds)
    skf = StratifiedKFold(n_splits=Folds)
    porc_vectores = np.zeros(Folds)
    j = 0
    f1 = np.zeros(Folds)
    auc_fpr = np.zeros(Folds)
    gmean = np.zeros(Folds)
    for train, test in skf.split(X, Y):
        Xtrain = X[train,:]
        Ytrain = Y[train]
        Xtest = X[test,:]
        Ytest = Y[test]
        
        #Haga el llamado a la función para crear y entrenar el modelo usando los datos de entrenamiento
        if ker == 'linear':
            modelo = SVC(kernel =ker, C=conf)
        else:
            modelo = SVC(kernel=ker, C= conf, gamma= gam)
        modelo.fit(Xtrain,Ytrain)


        #Validación
        Ytrain_pred = modelo.predict(Xtrain)
        Yest = modelo.predict(Xtest)

        #Evaluamos las predicciones del modelo con los datos de test      
        gmean[j] =  geometric_mean_score(y_true = Ytest, y_pred=Yest, average="weighted")
        f1[j] = f1_score(y_true = Ytest, y_pred=Yest, average = "weighted")
        EficienciaTrain[j] = np.mean(Ytrain_pred.ravel() == Ytrain.ravel())
        EficienciaVal[j] = np.mean(Yest.ravel() == Ytest.ravel())
        porc_vectores[j] = len(modelo.support_vectors_)/len(Xtrain)
        j += 1

    f1_m = np.mean(f1)
    f1_ic = np.std(auc_fpr)
    gmean = np.mean(gmean)
    gmean_ic = np.std(gmean)
    efi_train = np.mean(EficienciaTrain)
    efi_train_ic = np.std(EficienciaTrain)
    efi_test = np.mean(EficienciaVal)
    efi_test_ic = np.std(EficienciaVal)

    print('f1 = ' + str(f1_m) + '+-' + str(f1_ic))
    print('gmean = ' + str(gmean) + '+-' + str(gmean_ic))
    print('Eficiencia Train= ' + str(efi_train) + '+-' + str(efi_train_ic))
    print('Eficiencia Test= ' + str(efi_test) + '+-' + str(efi_test_ic))
        
    return f1_m, f1_ic, gmean, gmean_ic,efi_train, efi_train_ic, efi_test, efi_test_ic, porc_vectores

In [None]:
import pandas as pd
import qgrid
df_types = pd.DataFrame({
    'Kernel' : pd.Series(['linear','linear','linear','linear','linear','linear','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf']),
    'C' : pd.Series([0.001,0.01,0.1,1,10,100,0.001,0.001,0.001,0.01,0.01,0.01,0.1,0.1,0.1,1,1,1,10,10,10,100,100,100]),
    'gamma' : pd.Series([0,0,0,0,0,0,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1])})

df_types["f1"] = ""
df_types["IC f1"] = ""
df_types["gmean"] = ""
df_types["IC gmean"] = ""
df_types["Eficiencia Train"] = ""
df_types["IC Eficiencia Train"] = ""
df_types["Eficiencia Test"] = ""
df_types["IC Eficiencia Test"] = ""
df_types["% de Vectores de Soporte"] = ""
df_types.set_index(['Kernel','C','gamma'], inplace=True)

In [None]:
j = 0
for i in df_types.index:
    print(i)
    kernel = i[0]
    c = i[1]
    gamma = i[2]
    f1_m, f1_ic, gmean, gmean_ic, efi_train, efi_train_ic, efi_test, efi_test_ic, porc_vectore = prueba_svm(ker=kernel,conf=c,gam=gamma)
    df_types["f1"] = str(f1_m)
    df_types["IC f1"] = str(f1_ic)
    df_types["gmean"] = str(gmean)
    df_types["IC gmean"] = str(gmean_ic)
    df_types["Eficiencia Train"] = efi_train
    df_types["IC Eficiencia Train"] = efi_train_ic
    df_types["Eficiencia Test"] = efi_test
    df_types["IC Eficiencia Test"] = efi_test_ic
    df_types["% de Vectores de Soporte"] = str(porc_vectore)
    j = j+1

In [6]:
def train_svm(kernel, C, gamma):
    Folds = 4
    random.seed(19680801)
    f1 = np.zeros(Folds)
    auc_fpr = np.zeros(Folds)
    gmean = np.zeros(Folds)
    skf = StratifiedKFold(n_splits=Folds)
    EficienciaTrain = np.zeros(Folds)
    EficienciaVal = np.zeros(Folds)
    j = 0
    models = []
    for train, test in skf.split(X, Y):
        Xtrain = X[train,:]
        Ytrain = Y[train]
        Xtest = X[test,:]
        Ytest = Y[test]

        #Haga el llamado a la función para crear y entrenar el modelo usando los datos de entrenamiento
        model = SVC(C=C,kernel=kernel, gamma=gamma, probability=True)
        model.fit(Xtrain, Ytrain)
        models.append(model)
        #Validación
        Y_pred=model.predict_proba(Xtest)
        Ytrain_pred = model.predict(Xtrain)#Use el modelo previamente entrenado para hacer predicciones con las mismas muestras de entrenamiento
        Yest = model.predict(Xtest)#Use el modelo previamente entrenado para hacer predicciones con las muestras de test

        #Evaluamos las predicciones del modelo con los datos de test

     #Evaluamos las predicciones del modelo con los datos de test      
        gmean[j] =  geometric_mean_score(y_true = Ytest, y_pred=Yest, average="weighted")
        f1[j] = f1_score(y_true = Ytest, y_pred=Yest, average = "weighted")
        EficienciaTrain[j] = np.mean(Ytrain_pred.ravel() == Ytrain.ravel())
        EficienciaVal[j] = np.mean(Yest.ravel() == Ytest.ravel())
        porc_vectores[j] = len(modelo.support_vectors_)/len(Xtrain)
        j += 1

    f1_m = np.mean(f1)
    f1_ic = np.std(auc_fpr)
    gmean = np.mean(gmean)
    gmean_ic = np.std(gmean)
    efi_train = np.mean(EficienciaTrain)
    efi_train_ic = np.std(EficienciaTrain)
    efi_test = np.mean(EficienciaVal)
    efi_test_ic = np.std(EficienciaVal)

    print('f1 = ' + str(f1_m) + '+-' + str(f1_ic))
    print('gmean = ' + str(gmean) + '+-' + str(gmean_ic))
    print('Eficiencia Train= ' + str(efi_train) + '+-' + str(efi_train_ic))
    print('Eficiencia Test= ' + str(efi_test) + '+-' + str(efi_test_ic))
        
    return f1_m, f1_ic, gmean, gmean_ic,efi_train, efi_train_ic, efi_test, efi_test_ic, porc_vectores

In [7]:
import qgrid
randn = np.random.randn
df_types = pd.DataFrame({
    'Kernel' : pd.Series(['lineal','lineal','lineal','lineal','lineal','lineal','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf']),
    'C' : pd.Series([0.001,0.01,0.1,1,10,100,0.001,0.001,0.001,0.01,0.01,0.01,0.1,0.1,0.1,1,1,1,10,10,10,100,100,100]),
    'gamma' : pd.Series([0,0,0,0,0,0,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1])})

f1_ = []
stdf1_ = []
gmean_ = []
stdgmean_ = []
efi_train_ = []
std_efi_train_ = []
efi_test_ = []
std_efi_test_ = []
for e, i, j in zip(df_types['Kernel'], df_types['C'], df_types['gamma']):
    if(e == 'lineal'):
        f1_m, f1_ic, gmean, gmean_ic, efi_train, efi_train_ic, efi_test, efi_test_ic, porc_vectores = train_svm("linear", i, "auto")
    else:
        f1_m, f1_ic, gmean, gmean_ic, efi_train, efi_train_ic, efi_test, efi_test_ic, porc_vectores = train_svm(e, i, j)
    f1_.append(f1_m)
    stdf1_.append(f1_ic)
    gmean_.append(gmean)
    stdgmean_.append(gmean_ic)
    efi_train_.append(efi_train)
    std_efi_train_.append(efi_train_ic)
    efi_test_.append(efi_test)
    std_efi_test_.append(efi_test_ic)
    
df_types.set_index(['Kernel','C','gamma'], inplace=True)
df_types["f1"] = f1_
df_types["Intervalo de confianza f1"] = stdf1_
df_types["G-mean"] = gmean_
df_types["Intervalo de confianza G-mean"] = stdgmean_
df_types["Eficiencia Train"] = efi_train_
df_types["IC Eficiencia Train"] = std_efi_train_
df_types["Eficiencia Test"] = efi_test_
df_types["IC Eficiencia Test"] = std_efi_test_

NameError: name 'EficienciaTrain' is not defined

In [None]:
df_types