# 1. Importamos las librerias necesarias

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report



# 2. Cargamos la data

In [3]:
data = load_breast_cancer()
X = data.data
y = data.target 

# 0 - Maligno
# 1 - Benigno

# 3. Dividimos la data en entrenamiento y prueba

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 4. Escalar la data

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Entrenamos el modelo

In [6]:
# Entrenar un modelo de SVM
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# 6. Predecimos sobre la data

In [7]:
y_pred = svm_model.predict(X_test_scaled)

# 7. Evaluamos el modelo

In [8]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [9]:
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.956140350877193
Confusion Matrix:
[[41  2]
 [ 3 68]]
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.95      0.94        43
           1       0.97      0.96      0.96        71

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114



# 8. Tuneo de Hiper-Parametros

### Importamos la libreria necesaria

In [13]:
from sklearn.model_selection import train_test_split, GridSearchCV

In [14]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}

### Inicializamos el modelo

In [15]:
svm_model = SVC(random_state=42)

### Hacemos GridSearch

In [16]:
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
#cv es el número de divisiones para la validación cruzada. 
#(5-fold cross-validation) en este caso.
#verbose toma valores booleanos o enteros. Inidica si se mostrara mensajes durante la corrida del modelo.
grid_search.fit(X_train_scaled, y_train)


Fitting 5 folds for each of 24 candidates, totalling 120 fits


### Obtenemos los mejores parametros

In [17]:
best_params = grid_search.best_params_
print("Mejores hiperparámetros encontrados:", best_params)

Mejores hiperparámetros encontrados: {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}


### Entrenamos el modelo con los mejores hiperparámetros

In [18]:
best_svm_model = SVC(**best_params, random_state=42)
#** desempaqueta un diccionario

#SVC(C=0.1, gamma=0.1, kernel='linear', random_state=42)
best_svm_model.fit(X_train_scaled, y_train)

### Predecimos

In [19]:
y_pred = best_svm_model.predict(X_test_scaled)

### Evaluamos el rendimiento del modelo

In [20]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)


In [21]:
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.9824561403508771
Confusion Matrix:
[[41  2]
 [ 0 71]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

