In [23]:
# Importar librerías necesarias
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import tree

In [25]:
# Cargar dataset Titanic
df = sns.load_dataset("titanic")
print(df.head())

   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


In [27]:
# Seleccionar variables de interés
# Usaremos variables comunes que no tienen tantos nulos
data = df[["pclass", "sex", "age", "fare", "alone", "survived"]].dropna()

# Codificar variables categóricas
label = LabelEncoder()
data["sex"] = label.fit_transform(data["sex"])   # male=1, female=0
data["alone"] = label.fit_transform(data["alone"]) # True=1, False=0

# Separar características y etiqueta
X = data.drop("survived", axis=1)
y = data["survived"]


In [29]:
# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Escalar datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Entrenar Árbol de Decisión
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Entrenar K-NN (k=5 por defecto)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

In [38]:
# Evaluación Árbol de Decisión
print("Árbol de Decisión")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Matriz de Confusión:\n", confusion_matrix(y_test, y_pred_dt))
print("Reporte de Clasificación:\n", classification_report(y_test, y_pred_dt))

# Evaluación K-NN
print("\nK-NN")
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Matriz de Confusión:\n", confusion_matrix(y_test, y_pred_knn))
print("Reporte de Clasificación:\n", classification_report(y_test, y_pred_knn))

# Comparación de precisión
acc_dt = accuracy_score(y_test, y_pred_dt)
acc_knn = accuracy_score(y_test, y_pred_knn)

print("\nPrecisión Árbol de Decisión:", acc_dt)
print("Precisión K-NN:", acc_knn)



Árbol de Decisión
Accuracy: 0.7441860465116279
Matriz de Confusión:
 [[99 27]
 [28 61]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       0.78      0.79      0.78       126
           1       0.69      0.69      0.69        89

    accuracy                           0.74       215
   macro avg       0.74      0.74      0.74       215
weighted avg       0.74      0.74      0.74       215


K-NN
Accuracy: 0.7813953488372093
Matriz de Confusión:
 [[106  20]
 [ 27  62]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       0.80      0.84      0.82       126
           1       0.76      0.70      0.73        89

    accuracy                           0.78       215
   macro avg       0.78      0.77      0.77       215
weighted avg       0.78      0.78      0.78       215


Precisión Árbol de Decisión: 0.7441860465116279
Precisión K-NN: 0.7813953488372093
