In [1]:
from __future__ import division
import numpy as np
import scipy as sc
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
from sklearn.decomposition import PCA
import pandas as pd
import math
from numpy import random
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold

In [2]:
df = pd.read_excel('../data/CTG.xls', sheet_name='Raw Data', header=0, skiprows=[1])
data = df.to_numpy()
# Features matrix
X = data[:,0:22]
Y = data[:,23]

In [3]:
def prueba_svm(ker='linear',conf=0.001,gam=0.0):
    Folds = 4
    random.seed(19680801)
    EficienciaTrain = np.zeros(Folds)
    EficienciaVal = np.zeros(Folds)
    skf = StratifiedKFold(n_splits=Folds)
    porc_vectores = np.zeros(Folds)
    j = 0
    
    for train, test in skf.split(X, Y):
        Xtrain = X[train,:]
        Ytrain = Y[train]
        Xtest = X[test,:]
        Ytest = Y[test]

        #Normalizamos los datos
        scaler = preprocessing.StandardScaler().fit(Xtrain)
        Xtrain = scaler.transform(Xtrain)
        Xtest = scaler.transform(Xtest)
        
        #Haga el llamado a la función para crear y entrenar el modelo usando los datos de entrenamiento
        if ker == 'linear':
            modelo = SVC(kernel =ker, C=conf)
        else:
            modelo = SVC(kernel=ker, C= conf, gamma= gam)
        modelo.fit(Xtrain,Ytrain)


        #Validación
        Ytrain_pred = modelo.predict(Xtrain)
        Yest = modelo.predict(Xtest)

        #Evaluamos las predicciones del modelo con los datos de test
        EficienciaTrain[j] = np.mean(Ytrain_pred.ravel() == Ytrain.ravel())
        EficienciaVal[j] = np.mean(Yest.ravel() == Ytest.ravel())
        porc_vectores[j] = len(modelo.support_vectors_)/len(Xtrain)
        j += 1

    print('Eficiencia durante el entrenamiento = ' + str(np.mean(EficienciaTrain)) + '+-' + str(np.std(EficienciaTrain)))
    print('Eficiencia durante la validación = ' + str(np.mean(EficienciaVal)) + '+-' + str(np.std(EficienciaVal)))
    
    eficiencia_val = np.mean(EficienciaVal)
    ic_val = np.std(EficienciaVal)
    eficiencia_train = np.mean(EficienciaTrain)
    ic_train = np.std(EficienciaTrain)
    porc_vect = np.mean(porc_vectores)
    
    return eficiencia_val,ic_val,eficiencia_train,ic_train,porc_vect

In [1]:
import pandas as pd
import qgrid
df_types = pd.DataFrame({
    'Kernel' : pd.Series(['linear','linear','linear','linear','linear','linear','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf','rbf']),
    'C' : pd.Series([0.001,0.01,0.1,1,10,100,0.001,0.001,0.001,0.01,0.01,0.01,0.1,0.1,0.1,1,1,1,10,10,10,100,100,100]),
    'gamma' : pd.Series([0,0,0,0,0,0,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1,0.01,0.1,1])})
df_types["Eficiencia en validacion"] = ""
df_types["Intervalo de confianza"] = ""
df_types["Eficiencia en entrenamiento"] = ""
df_types["Intervalo de confianza"] = ""
df_types["% de Vectores de Soporte"] = ""
df_types.set_index(['Kernel','C','gamma'], inplace=True)

In [6]:
j = 0
for i in df_types.index:
    print(i)
    kernel = i[0]
    c = i[1]
    gamma = i[2]    
    eficiencia_val,ic_val,eficiencia_train,ic_train,porc_vect=prueba_svm(ker=kernel,conf=c,gam=gamma)
    df_types.loc[i,"Eficiencia en validacion"] = str(eficiencia_val)
    df_types.loc[i,"Intervalo de confianza"] = str(ic_val)
    df_types.loc[i,"Eficiencia en entrenamiento"] = str(eficiencia_train)
    df_types.loc[i,"Intervalo de confianza"] = str(ic_train)
    df_types.loc[i, "% de Vectores de Soporte"] = str(porc_vect)
    j = j+1
    print(eficiencia_val,ic_val,eficiencia_train,ic_train,porc_vect)

('linear', 0.001, 0.0)
Eficiencia durante el entrenamiento = 0.8162457766782173+-0.008395656079409227
Eficiencia durante la validación = 0.8137566727553347+-0.02615462018078472
0.8137566727553347 0.02615462018078472 0.8162457766782173 0.008395656079409227 0.4137694056473531
('linear', 0.01, 0.0)
Eficiencia durante el entrenamiento = 0.8958917256325641+-0.012133197174505657
Eficiencia durante la validación = 0.8349032892966881+-0.02246488742738512
0.8349032892966881 0.02246488742738512 0.8958917256325641 0.012133197174505657 0.3264359294061193
('linear', 0.1, 0.0)
Eficiencia durante el entrenamiento = 0.9167451021267056+-0.007922726755760525
Eficiencia durante la validación = 0.8372440635487022+-0.04777071892472698
0.8372440635487022 0.04777071892472698 0.9167451021267056 0.007922726755760525 0.2568217217386516
('linear', 1.0, 0.0)
Eficiencia durante el entrenamiento = 0.9244270245395154+-0.00584971906888737
Eficiencia durante la validación = 0.8391149129886865+-0.055567550581846384
0.8