# Titanic usando SVM (clasificación)

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

In [None]:
# Cargamos los datos
df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

In [None]:
# Preprocesamiento igual que en tu código
df = df.drop(['Cabin', 'Ticket', 'Name', 'PassengerId'], axis=1)
mean_age = df['Age'].mean()
df['Age'].fillna(mean_age, inplace=True)
df = pd.get_dummies(df, drop_first=True)

In [None]:
# Definimos las variables independientes y la variable objetivo
X = df.drop('Survived', axis=1)
y = df['Survived']

In [None]:
# Escalamos los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Configuración de parámetros para GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'poly', 'sigmoid', 'linear'],
    'degree': [2, 3]  # Solo es relevante para el kernel 'poly'
}

In [None]:
# Crear un clasificador SVM con GridSearchCV
svc = SVC()
grid_search = GridSearchCV(svc, param_grid, refit=True, verbose=2, cv=5)

In [None]:
# Entrenar el modelo con GridSearchCV
grid_search.fit(X_scaled, y)

Fitting 5 folds for each of 96 candidates, totalling 480 fits
[CV] END ...............C=0.1, degree=2, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...............C=0.1, degree=2, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...............C=0.1, degree=2, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...............C=0.1, degree=2, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...............C=0.1, degree=2, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ..............C=0.1, degree=2, gamma=1, kernel=poly; total time=   0.0s
[CV] END ..............C=0.1, degree=2, gamma=1, kernel=poly; total time=   0.0s
[CV] END ..............C=0.1, degree=2, gamma=1, kernel=poly; total time=   0.0s
[CV] END ..............C=0.1, degree=2, gamma=1, kernel=poly; total time=   0.0s
[CV] END ..............C=0.1, degree=2, gamma=1, kernel=poly; total time=   0.0s
[CV] END ...........C=0.1, degree=2, gamma=1, kernel=sigmoid; total time=   0.0s
[CV] END ...........C=0.1, degree=2, gamma=1, k

In [None]:
# Mejores parámetros
print("Mejores parámetros:", grid_search.best_params_)

# Mejor modelo encontrado por GridSearchCV
best_svc = grid_search.best_estimator_

Mejores parámetros: {'C': 1, 'degree': 2, 'gamma': 0.1, 'kernel': 'rbf'}


In [None]:
# Realizamos la validación cruzada con el mejor modelo
y_pred = cross_val_predict(best_svc, X_scaled, y, cv=5)

In [None]:
# Evaluación del modelo
confusion = confusion_matrix(y, y_pred)
classification_rep = classification_report(y, y_pred)

In [None]:
# Mostrar la matriz de confusión y el informe de clasificación
print("Matriz de Confusión:\n", confusion)
print("Informe de Clasificación:\n", classification_rep)

Matriz de Confusión:
 [[501  48]
 [107 235]]
Informe de Clasificación:
               precision    recall  f1-score   support

           0       0.82      0.91      0.87       549
           1       0.83      0.69      0.75       342

    accuracy                           0.83       891
   macro avg       0.83      0.80      0.81       891
weighted avg       0.83      0.83      0.82       891

