In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1️⃣ Cargar datos ya prefiltrados manualmente
df = pd.read_excel("base_limpia_grd_min20.xlsx")
df.columns = df.columns.str.strip()
df.fillna(df.mean(numeric_only=True), inplace=True)
df.fillna('Desconocido', inplace=True)

# 2️⃣ Features (verifica que estas columnas existan en tu archivo Excel)
features = [
    'Edad', 'Grupo Edad', 'Sexo', 'Codigo de ciudad', 'Tipo de ingreso',
    'Días estancia', 'ServicioAlta', 'Cuidados intensivos', 'Días de Unidad Cuidado Intensivo',
    'Dx de ingreso', 'Dx principal de egreso', 'Dx principal de egreso .1', 'Dx Ppal 3 Caracteres',
    'Dxr 1', 'Dxr 2', 'Dxr 3', 'Dxr 4', 'Dxr 5',
    'Código causa externa', 'Causa externa', 'Situacion al alta',
    'Proc1', 'Proc2', 'Proc3', 'Proc4', 'Proc5', 'Proc6', 'Proc7', 'Proc8', 'Proc9', 'Proc10',
    'Tipo servicio', 'Causa Basica de muerte', 'Infecciones', 'Infección Quirurgica'
]

# Filtrar las features existentes en la base
features = [f for f in features if f in df.columns]

# 3️⃣ Matriz de entrada y etiquetas
X = pd.get_dummies(df[features], drop_first=True)
y = df['GRD -Código'].astype(str)

# 4️⃣ Dividir antes de codificar etiquetas
X_train, X_test, y_train_raw, y_test_raw = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 5️⃣ Contar frecuencia total de GRD en todo el dataset
y_counts = y.value_counts()
valid_grds = y_counts[y_counts >= 20].index

# 6️⃣ Filtrar conjuntos de entrenamiento y prueba con solo GRD válidos
mask_train = y_train_raw.isin(valid_grds)
mask_test = y_test_raw.isin(valid_grds)

X_train = X_train[mask_train]
y_train_raw = y_train_raw[mask_train]

X_test = X_test[mask_test]
y_test_raw = y_test_raw[mask_test]

# 7️⃣ Codificar etiquetas
le = LabelEncoder()
y_train = le.fit_transform(y_train_raw)
y_test = le.transform(y_test_raw)

# 8️⃣ Escalado
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

# 9️⃣ Entrenamiento con MLPClassifier
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64, 32),
    activation='relu',
    solver='adam',
    learning_rate_init=0.001,
    batch_size=32,
    max_iter=100,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42,
    verbose=True
)
mlp.fit(X_train_s, y_train)

# 🔟 Evaluación
y_pred = mlp.predict(X_test_s)
acc = accuracy_score(y_test, y_pred)

print(f"\n🎯 Accuracy: {acc:.4f}\n")
print("📋 Reporte de clasificación:")
print(classification_report(y_test, y_pred, target_names=le.inverse_transform(np.unique(y_test))))
print("🔢 Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))


Iteration 1, loss = 3.65572464
Validation score: 0.381162
Iteration 2, loss = 1.58238754
Validation score: 0.465105
Iteration 3, loss = 0.76546856
Validation score: 0.496828
Iteration 4, loss = 0.41479810
Validation score: 0.503660
Iteration 5, loss = 0.26747060
Validation score: 0.506101
Iteration 6, loss = 0.18934325
Validation score: 0.516837
Iteration 7, loss = 0.15070470
Validation score: 0.503172
Iteration 8, loss = 0.13184505
Validation score: 0.523182
Iteration 9, loss = 0.12327368
Validation score: 0.524158
Iteration 10, loss = 0.11390210
Validation score: 0.512445
Iteration 11, loss = 0.10539486
Validation score: 0.525134
Iteration 12, loss = 0.10114332
Validation score: 0.517814
Iteration 13, loss = 0.09537005
Validation score: 0.518302
Iteration 14, loss = 0.09207052
Validation score: 0.519278
Iteration 15, loss = 0.09958905
Validation score: 0.532943
Iteration 16, loss = 0.08304752
Validation score: 0.535383
Iteration 17, loss = 0.06142364
Validation score: 0.545144
Iterat

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
