# Máquinas de Vectores de Soporte

In [1]:
import numpy as np
import pandas as pd

## Importando los datos

In [78]:
mammographic_standard = pd.read_csv('../processed/mammographic_masses_standard.csv')
mammographic_standard.head(10)

Unnamed: 0,Age,Shape,Margin,Density,Severity
0,0.765804,0.17446,1.395631,0.240313,1
1,0.151666,0.979883,1.395631,0.240313,1
2,-1.895458,-1.436386,-1.158927,0.240313,0
3,0.083429,-1.436386,1.395631,0.240313,1
4,1.379941,-1.436386,0.756992,0.240313,1
5,-0.940133,-0.630963,-1.158927,0.240313,1
6,-1.349558,0.17446,-1.158927,-2.612545,0
7,0.288141,-0.630963,-1.158927,-2.612545,0
8,-0.121284,-1.436386,-1.158927,0.240313,0
9,-0.257759,0.17446,0.756992,0.240313,0


## Datos de entrenamiento y prueba

In [79]:
df_feat = mammographic_standard.drop('Severity', axis = 1)
df_feat.head()

Unnamed: 0,Age,Shape,Margin,Density
0,0.765804,0.17446,1.395631,0.240313
1,0.151666,0.979883,1.395631,0.240313
2,-1.895458,-1.436386,-1.158927,0.240313
3,0.083429,-1.436386,1.395631,0.240313
4,1.379941,-1.436386,0.756992,0.240313


In [80]:
df_target = mammographic_standard['Severity'].to_frame()
df_target.head()

Unnamed: 0,Severity
0,1
1,1
2,0
3,1
4,1


In [81]:
from sklearn.model_selection import train_test_split

In [82]:
X_train, X_test, y_train, y_test = train_test_split(df_feat, np.ravel(df_target), test_size=0.30)

## Entrenamiento de la Máquina de Soporte

In [83]:
from sklearn.svm import SVC
scv_mm = SVC(gamma = 'scale')
scv_mm.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Predicciones y Evaluaciones

In [84]:
y_predict = scv_mm.predict(X_test)

In [85]:
from sklearn.metrics import classification_report,confusion_matrix

### Matriz de confución

In [86]:
print(confusion_matrix(y_test,y_predict))

[[ 94  29]
 [ 22 105]]


### Reporte de clasificación

In [87]:
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           0       0.81      0.76      0.79       123
           1       0.78      0.83      0.80       127

   micro avg       0.80      0.80      0.80       250
   macro avg       0.80      0.80      0.80       250
weighted avg       0.80      0.80      0.80       250



## Búsqueda de hiperparámetros con Gridsearch

In [88]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100, 1000], 
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
    'coef0': [1, 2, 3],
    'kernel': ['rbf', 'sigmoid']
} 
grid = GridSearchCV(SVC(),param_grid,refit=True,cv=5, verbose=3)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 150 candidates, totalling 750 fits
[CV] C=0.1, coef0=1, gamma=1, kernel=rbf .............................
[CV]  C=0.1, coef0=1, gamma=1, kernel=rbf, score=0.7521367521367521, total=   0.0s
[CV] C=0.1, coef0=1, gamma=1, kernel=rbf .............................
[CV]  C=0.1, coef0=1, gamma=1, kernel=rbf, score=0.8103448275862069, total=   0.0s
[CV] C=0.1, coef0=1, gamma=1, kernel=rbf .............................
[CV]  C=0.1, coef0=1, gamma=1, kernel=rbf, score=0.8103448275862069, total=   0.0s
[CV] C=0.1, coef0=1, gamma=1, kernel=rbf .............................
[CV]  C=0.1, coef0=1, gamma=1, kernel=rbf, score=0.7844827586206896, total=   0.0s
[CV] C=0.1, coef0=1, gamma=1, kernel=rbf .............................
[CV]  C=0.1, coef0=1, gamma=1, kernel=rbf, score=0.7672413793103449, total=   0.0s
[CV] C=0.1, coef0=1, gamma=1, kernel=sigmoid .........................
[CV]  C=0.1, coef0=1, gamma=1, kernel=sigmoid, score=0.7008547008547008, total=   0.0s
[CV] C=0.

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV]  C=0.1, coef0=1, gamma=0.1, kernel=sigmoid, score=0.8103448275862069, total=   0.0s
[CV] C=0.1, coef0=1, gamma=0.1, kernel=sigmoid .......................
[CV]  C=0.1, coef0=1, gamma=0.1, kernel=sigmoid, score=0.8448275862068966, total=   0.0s
[CV] C=0.1, coef0=1, gamma=0.1, kernel=sigmoid .......................
[CV]  C=0.1, coef0=1, gamma=0.1, kernel=sigmoid, score=0.8017241379310345, total=   0.0s
[CV] C=0.1, coef0=1, gamma=0.1, kernel=sigmoid .......................
[CV]  C=0.1, coef0=1, gamma=0.1, kernel=sigmoid, score=0.7931034482758621, total=   0.0s
[CV] C=0.1, coef0=1, gamma=0.01, kernel=rbf ..........................
[CV]  C=0.1, coef0=1, gamma=0.01, kernel=rbf, score=0.7692307692307693, total=   0.0s
[CV] C=0.1, coef0=1, gamma=0.01, kernel=rbf ..........................
[CV]  C=0.1, coef0=1, gamma=0.01, kernel=rbf, score=0.8103448275862069, total=   0.0s
[CV] C=0.1, coef0=1, gamma=0.01, kernel=rbf ..........................
[CV]  C=0.1, coef0=1, gamma=0.01, kernel=rbf, 

[Parallel(n_jobs=1)]: Done 750 out of 750 | elapsed:   31.0s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'coef0': [1, 2, 3], 'kernel': ['rbf', 'sigmoid']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [89]:
yg_predict = grid.predict(X_test)

### Mejores parametros

In [90]:
grid.best_params_

{'C': 1, 'coef0': 3, 'gamma': 0.1, 'kernel': 'sigmoid'}

### Mejor estimador

In [91]:
grid.best_estimator_

SVC(C=1, cache_size=200, class_weight=None, coef0=3,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='sigmoid',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### Matriz de confución

In [92]:
print(confusion_matrix(y_test,yg_predict))

[[ 91  32]
 [ 21 106]]


### Reporte de clasificación

In [93]:
print(classification_report(y_test,yg_predict))

              precision    recall  f1-score   support

           0       0.81      0.74      0.77       123
           1       0.77      0.83      0.80       127

   micro avg       0.79      0.79      0.79       250
   macro avg       0.79      0.79      0.79       250
weighted avg       0.79      0.79      0.79       250

