In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier

# 1. Configuración de datos base
# Utilizaremos el dataset Iris para ejemplos de clasificación multiclase
# y el dataset Breast Cancer para ejemplos binarios.
iris = datasets.load_iris()
X_iris, y_iris = iris.data, iris.target

cancer = datasets.load_breast_cancer()
X_cancer, y_cancer = cancer.data, cancer.target

# División de datos (Entrenamiento y Prueba)
X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=0.3, random_state=42)

def evaluate_model(model, X_t, y_t, name):
    y_pred = model.predict(X_t)
    acc = accuracy_score(y_t, y_pred)
    print(f"--- {name} ---")
    print(f"Accuracy: {acc:.4f}")
    print(classification_report(y_t, y_pred))
    print("\n")


In [None]:
# ==========================================
# 6.1. Árboles de Decisión
# =================:=========================
print("Ejecutando Árbol de Decisión...")
dt_clf = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_clf.fit(X_train, y_train)

# Visualización del árbol
plt.figure(figsize=(12, 8))
plot_tree(dt_clf, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.title("Árbol de Decisión (Iris Dataset)")
plt.show()

evaluate_model(dt_clf, X_test, y_test, "Decision Tree")

In [None]:

# ==========================================
# 6.2. SVM (Máquinas de Vectores de Soporte)
# ==========================================
print("Ejecutando SVM...")
# Usamos un kernel RBF (Radial Basis Function) que es el estándar
svm_clf = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_clf.fit(X_train, y_train)

evaluate_model(svm_clf, X_test, y_test, "SVM (Kernel RBF)")


In [None]:
# ==========================================
# 6.3. Naive Bayes
# ==========================================
print("Ejecutando Naive Bayes...")
# GaussianNB asume que las características siguen una distribución normal
nb_clf = GaussianNB()
nb_clf.fit(X_train, y_train)

evaluate_model(nb_clf, X_test, y_test, "Naive Bayes (Gaussiano)")

In [None]:
# ==========================================
# 6.4. Ensamblados: Bootstrapping, Bagging y Boosting
# ==========================================

# A. Bagging (Bootstrap Aggregating)
# Entrena múltiples árboles en subconjuntos aleatorios de los datos (con reemplazo)
print("Ejecutando Bagging...")
bagging_clf = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=50,
    max_samples=0.8, # Bootstrapping de filas
    random_state=42
)
bagging_clf.fit(X_train, y_train)
evaluate_model(bagging_clf, X_test, y_test, "Bagging (50 Árboles)")

# B. Boosting (AdaBoost)
# Entrena modelos secuencialmente, dando más peso a los errores del modelo anterior
print("Ejecutando Boosting (AdaBoost)...")
boosting_clf = AdaBoostClassifier(
    n_estimators=50,
    learning_rate=1.0,
    random_state=42
)
boosting_clf.fit(X_train, y_train)
evaluate_model(boosting_clf, X_test, y_test, "AdaBoost")


In [None]:
# ==========================================
# 6.5. Random Forest
# ==========================================
# Es un tipo especial de Bagging que también selecciona características al azar
print("Ejecutando Random Forest...")
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf_clf.fit(X_train, y_train)

# Importancia de las características
importances = rf_clf.feature_importances_
indices = np.argsort(importances)

plt.figure(figsize=(10, 6))
plt.title('Importancia de Características - Random Forest')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [iris.feature_names[i] for i in indices])
plt.xlabel('Importancia Relativa')
plt.show()

evaluate_model(rf_clf, X_test, y_test, "Random Forest")

print("Notebook finalizado correctamente.")