In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('datos_salud.csv')
data.head(20)

In [None]:
# Variables predictoras y objetivo
X = data[['edad', 'imc', 'fuma', "ejercicio"]]
y = data['riesgo_alto']

In [None]:
# División entrenamiento / prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Entrenar modelo Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

In [None]:
# Predicciones
y_pred_rf = rf.predict(X_test)

In [None]:
# Evaluación
print("=== Random Forest ===")
print(classification_report(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))
print(f"Precisión: {accuracy_score(y_test, y_pred_rf):.3f}")

In [None]:
# Importancia de características
plt.figure(figsize=(6,4))
plt.bar(X.columns, rf.feature_importances_, color='green', alpha=0.7)
plt.title("Importancia de variables - Random Forest")
plt.show()

In [None]:
# Entrenar modelo Gradient Boosting
gb = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.01,
    max_depth=3,
    random_state=42
)
gb.fit(X_train, y_train)

In [None]:
# Predicciones
y_pred_gb = gb.predict(X_test)

In [None]:
# Evaluación
print("=== Gradient Boosting ===")
print(classification_report(y_test, y_pred_gb))
print(confusion_matrix(y_test, y_pred_gb))
print(f"Precisión: {accuracy_score(y_test, y_pred_gb):.3f}")

In [None]:
# Importancia de características
plt.figure(figsize=(6,4))
plt.bar(X.columns, gb.feature_importances_, color='orange', alpha=0.7)
plt.title("Importancia de variables - Gradient Boosting")
plt.show()