# Máquinas de Vectores de Soporte

In [8]:
import numpy as np
import pandas as pd

## Importando los datos

In [9]:
mammographic_standard = pd.read_csv('./processed/mammographic_masses_standard.csv')
mammographic_standard.head(10)

Unnamed: 0,Age,Shape,Margin,Density,Severity
0,0.765804,0.17446,1.395631,0.240313,1
1,0.151666,0.979883,1.395631,0.240313,1
2,-1.895458,-1.436386,-1.158927,0.240313,0
3,0.083429,-1.436386,1.395631,0.240313,1
4,1.379941,-1.436386,0.756992,0.240313,1
5,-0.940133,-0.630963,-1.158927,0.240313,1
6,-1.349558,0.17446,-1.158927,-2.612545,0
7,0.288141,-0.630963,-1.158927,-2.612545,0
8,-0.121284,-1.436386,-1.158927,0.240313,0
9,-0.257759,0.17446,0.756992,0.240313,0


## Datos de entrenamiento y prueba

In [10]:
df_feat = mammographic_standard.drop('Severity', axis = 1)
df_feat.head()

Unnamed: 0,Age,Shape,Margin,Density
0,0.765804,0.17446,1.395631,0.240313
1,0.151666,0.979883,1.395631,0.240313
2,-1.895458,-1.436386,-1.158927,0.240313
3,0.083429,-1.436386,1.395631,0.240313
4,1.379941,-1.436386,0.756992,0.240313


In [11]:
df_target = mammographic_standard['Severity'].to_frame()
df_target.head()

Unnamed: 0,Severity
0,1
1,1
2,0
3,1
4,1


In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(df_feat, np.ravel(df_target), test_size=0.25, random_state=42)

## Entrenamiento de la Máquina de Soporte

In [14]:
from sklearn.svm import SVC
scv_mm = SVC(gamma = 1)
scv_mm.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Predicciones y Evaluaciones

In [15]:
y_predict = scv_mm.predict(X_test)

In [16]:
from sklearn.metrics import classification_report,confusion_matrix

### Matriz de confución

In [17]:
print(confusion_matrix(y_test,y_predict))

[[72 34]
 [11 91]]


### Reporte de clasificación

In [18]:
print(classification_report(y_test,y_predict))

             precision    recall  f1-score   support

          0       0.87      0.68      0.76       106
          1       0.73      0.89      0.80       102

avg / total       0.80      0.78      0.78       208



## Búsqueda de hiperparámetros con Gridsearch

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'C': [0.001, 0.1, 1, 10, 100], 
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
    'coef0': [1, 2, 3],
    'degree': [0, 1, 2, 3, 4],
    'kernel': ['linear','rbf', 'sigmoid', 'poly']
} 
grid = GridSearchCV(SVC(),param_grid,refit=True,cv=5, verbose=1)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 1500 candidates, totalling 7500 fits


In [20]:
yg_predict = grid.predict(X_test)

### Mejores parametros

In [21]:
grid.best_params_

{'C': 10, 'coef0': 3, 'gamma': 0.01, 'kernel': 'sigmoid'}

### Mejor estimador

In [22]:
grid.best_estimator_

SVC(C=10, cache_size=200, class_weight=None, coef0=3,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='sigmoid',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### Matriz de confución

In [23]:
print(confusion_matrix(y_test,yg_predict))

[[70 36]
 [11 91]]


### Reporte de clasificación

In [24]:
print(classification_report(y_test,yg_predict))

             precision    recall  f1-score   support

          0       0.86      0.66      0.75       106
          1       0.72      0.89      0.79       102

avg / total       0.79      0.77      0.77       208

