# üè• Sistema de Predicci√≥n de Diabetes - dIAbetes
## Switch Case para Variables Categ√≥ricas

Este notebook convierte autom√°ticamente las variables categ√≥ricas (g√©nero, etnia, tabaquismo) en formato **one-hot encoding** que el modelo necesita.

## 1Ô∏è‚É£ Importar Librer√≠as y Funciones

In [None]:
import pandas as pd
import numpy as np
import pickle
from typing import Tuple

print("‚úÖ Librer√≠as importadas correctamente")

## 2Ô∏è‚É£ Funci√≥n de Conversi√≥n con Switch Case

In [None]:
def prepare_patient_data(
    age, gender, ethnicity, income_level, smoking_status,
    alcohol_consumption_per_week, physical_activity_minutes_per_week,
    diet_score, sleep_hours_per_day, screen_time_hours_per_day,
    family_history_diabetes, hypertension_history, cardiovascular_history,
    bmi, waist_to_hip_ratio, systolic_bp, diastolic_bp, heart_rate,
    cholesterol_total, hdl_cholesterol, ldl_cholesterol, triglycerides,
    glucose_fasting, glucose_postprandial, insulin_level, hba1c, diabetes_risk_score
) -> Tuple[np.ndarray, pd.DataFrame]:
    """
    SWITCH CASE para variables categ√≥ricas:
    - gender: 'Female' ‚Üí gender_Female=1, gender_Male=0
    - ethnicity: Solo la seleccionada = 1, resto = 0
    - smoking_status: Solo la seleccionada = 1, resto = 0
    """
    
    features = np.zeros(34)
    
    # Variables num√©ricas (0-23)
    features[0:24] = [
        age, income_level, alcohol_consumption_per_week,
        physical_activity_minutes_per_week, diet_score, sleep_hours_per_day,
        screen_time_hours_per_day, family_history_diabetes, hypertension_history,
        cardiovascular_history, bmi, waist_to_hip_ratio, systolic_bp,
        diastolic_bp, heart_rate, cholesterol_total, hdl_cholesterol,
        ldl_cholesterol, triglycerides, glucose_fasting, glucose_postprandial,
        insulin_level, hba1c, diabetes_risk_score
    ]
    
    # SWITCH CASE para GENDER (24-25)
    gender = gender.strip().capitalize()
    if gender == 'Female':
        features[24], features[25] = 1, 0
    elif gender == 'Male':
        features[24], features[25] = 0, 1
    else:
        raise ValueError(f"G√©nero inv√°lido: '{gender}'")
    
    # SWITCH CASE para ETHNICITY (26-30)
    ethnicity = ethnicity.strip().capitalize()
    ethnicity_map = {'Asian': 26, 'Black': 27, 'Hispanic': 28, 'Other': 29, 'White': 30}
    if ethnicity in ethnicity_map:
        features[ethnicity_map[ethnicity]] = 1
    else:
        raise ValueError(f"Etnia inv√°lida: '{ethnicity}'")
    
    # SWITCH CASE para SMOKING_STATUS (31-33)
    smoking_status = smoking_status.strip().capitalize()
    smoking_map = {'Current': 31, 'Former': 32, 'Never': 33}
    if smoking_status in smoking_map:
        features[smoking_map[smoking_status]] = 1
    else:
        raise ValueError(f"Estado de fumador inv√°lido: '{smoking_status}'")
    
    # Crear DataFrame
    columns = [
        'age', 'income_level', 'alcohol_consumption_per_week',
        'physical_activity_minutes_per_week', 'diet_score', 'sleep_hours_per_day',
        'screen_time_hours_per_day', 'family_history_diabetes', 'hypertension_history',
        'cardiovascular_history', 'bmi', 'waist_to_hip_ratio', 'systolic_bp',
        'diastolic_bp', 'heart_rate', 'cholesterol_total', 'hdl_cholesterol',
        'ldl_cholesterol', 'triglycerides', 'glucose_fasting', 'glucose_postprandial',
        'insulin_level', 'hba1c', 'diabetes_risk_score',
        'gender_Female', 'gender_Male',
        'ethnicity_Asian', 'ethnicity_Black', 'ethnicity_Hispanic', 'ethnicity_Other', 'ethnicity_White',
        'smoking_status_Current', 'smoking_status_Former', 'smoking_status_Never'
    ]
    
    df = pd.DataFrame([features], columns=columns)
    return features, df

print("‚úÖ Funci√≥n prepare_patient_data() definida")

## 3Ô∏è‚É£ Ejemplo 1: Paciente Sano

In [None]:
# Ingresar datos del paciente (formato amigable)
features, df_patient = prepare_patient_data(
    age=45,
    gender='Female',  # ‚Üê 'Female' o 'Male'
    ethnicity='White',  # ‚Üê 'Asian', 'Black', 'Hispanic', 'Other', 'White'
    income_level=2,  # ‚Üê 0=Low, 1=Medium, 2=High
    smoking_status='Never',  # ‚Üê 'Current', 'Former', 'Never'
    alcohol_consumption_per_week=3.5,
    physical_activity_minutes_per_week=300,
    diet_score=8,
    sleep_hours_per_day=7.5,
    screen_time_hours_per_day=4.0,
    family_history_diabetes=0,  # ‚Üê 0=No, 1=Yes
    hypertension_history=0,
    cardiovascular_history=0,
    bmi=23.5,
    waist_to_hip_ratio=0.82,
    systolic_bp=118,
    diastolic_bp=76,
    heart_rate=72,
    cholesterol_total=185.0,
    hdl_cholesterol=65.0,
    ldl_cholesterol=110.0,
    triglycerides=95.0,
    glucose_fasting=92.0,
    glucose_postprandial=125.0,
    insulin_level=8.5,
    hba1c=5.3,
    diabetes_risk_score=25
)

print("‚úÖ Datos del paciente convertidos exitosamente")
print(f"\nüìä Total de caracter√≠sticas: {len(features)}")
print("\nüîç Verificaci√≥n del one-hot encoding:")
print(f"   gender_Female: {df_patient['gender_Female'].values[0]}")
print(f"   gender_Male: {df_patient['gender_Male'].values[0]}")
print(f"   ethnicity_White: {df_patient['ethnicity_White'].values[0]}")
print(f"   smoking_status_Never: {df_patient['smoking_status_Never'].values[0]}")

# Mostrar todas las caracter√≠sticas
print("\nüìã DataFrame completo:")
df_patient.T

## 4Ô∏è‚É£ Ejemplo 2: Paciente con Alto Riesgo

In [None]:
features2, df_patient2 = prepare_patient_data(
    age=62,
    gender='Male',
    ethnicity='Hispanic',
    income_level=0,
    smoking_status='Current',
    alcohol_consumption_per_week=12.0,
    physical_activity_minutes_per_week=80,
    diet_score=4,
    sleep_hours_per_day=6.0,
    screen_time_hours_per_day=8.5,
    family_history_diabetes=1,
    hypertension_history=1,
    cardiovascular_history=1,
    bmi=35.2,
    waist_to_hip_ratio=1.05,
    systolic_bp=148,
    diastolic_bp=94,
    heart_rate=88,
    cholesterol_total=245.0,
    hdl_cholesterol=35.0,
    ldl_cholesterol=165.0,
    triglycerides=285.0,
    glucose_fasting=165.0,
    glucose_postprandial=245.0,
    insulin_level=32.5,
    hba1c=8.4,
    diabetes_risk_score=82
)

print("‚úÖ Paciente con alto riesgo convertido")
print("\nüîç Verificaci√≥n del one-hot encoding:")
print(f"   gender_Male: {df_patient2['gender_Male'].values[0]}")
print(f"   ethnicity_Hispanic: {df_patient2['ethnicity_Hispanic'].values[0]}")
print(f"   smoking_status_Current: {df_patient2['smoking_status_Current'].values[0]}")

df_patient2.T

## 5Ô∏è‚É£ Hacer Predicci√≥n con el Modelo

**Nota:** Necesitas tener el modelo guardado como `diabetes_model.pkl`

In [None]:
# Cargar modelo (aseg√∫rate de tener el archivo)
try:
    with open('diabetes_model.pkl', 'rb') as f:
        model = pickle.load(f)
    
    # Hacer predicci√≥n
    prediction = model.predict(df_patient)[0]
    probabilities = model.predict_proba(df_patient)[0]
    
    print("üéØ RESULTADO DE LA PREDICCI√ìN")
    print("=" * 50)
    print(f"Diagn√≥stico: {'DIABETES' if prediction == 1 else 'NO DIABETES'}")
    print(f"Probabilidad de NO tener diabetes: {probabilities[0]:.2%}")
    print(f"Probabilidad de tener diabetes: {probabilities[1]:.2%}")
    
except FileNotFoundError:
    print("‚ö†Ô∏è Archivo 'diabetes_model.pkl' no encontrado")
    print("   Aseg√∫rate de tener el modelo entrenado guardado")

## üìù Resumen del Switch Case

### ‚úÖ Variables Convertidas Autom√°ticamente:

1. **Gender** (`'Female'` o `'Male'`):
   - Si `'Female'` ‚Üí `gender_Female=1`, `gender_Male=0`
   - Si `'Male'` ‚Üí `gender_Female=0`, `gender_Male=1`

2. **Ethnicity** (`'Asian'`, `'Black'`, `'Hispanic'`, `'Other'`, `'White'`):
   - Solo la etnia seleccionada = 1
   - Las otras 4 = 0

3. **Smoking Status** (`'Current'`, `'Former'`, `'Never'`):
   - Solo el estado seleccionado = 1
   - Los otros 2 = 0

### üéØ Resultado Final:
- **34 caracter√≠sticas** listas para el modelo
- Formato compatible con el modelo entrenado
- Validaci√≥n autom√°tica de valores