# Setup: Imports y configuración

In [None]:
# Imports esenciales
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import DQN
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns

# Semilla global
GLOBAL_SEED = 42
np.random.seed(GLOBAL_SEED)


# Simulación de datos 

In [None]:
import numpy as np
import pandas as pd

# Parámetros reproducibles
GLOBAL_SEED = 42
np.random.seed(GLOBAL_SEED)

N_TEAMS = 3
N_PACIENTES = 8
N_DIAS = 20

# Info equipos: IDs, tipos y bases
teams_info = pd.DataFrame({
    "team_id": [f"E{i+1}" for i in range(N_TEAMS)],
    "team_type": ["A", "B", "A"],
    "base_lat": [2, 7, 4],
    "base_lon": [2, 8, 6]
})

def simular_pacientes(ndias=N_DIAS, npac=N_PACIENTES):
    rows = []
    for d in range(ndias):
        for i in range(npac):
            lat = np.random.uniform(1, 9)
            lon = np.random.uniform(1, 9)
            start = np.random.randint(8, 15)
            end = start + np.random.randint(2, 4)
            care_time = np.random.randint(25, 50)
            tipo = np.random.choice(["A", "B"])
            rows.append({
                "day": d+1,
                "patient_id": f"P{i+1}_Day{d+1}",
                "lat": lat,
                "lon": lon,
                "window_start": f"{start:02d}:00",
                "window_end": f"{end:02d}:00",
                "estimated_care_time": care_time,
                "required_team_type": tipo
            })
    return pd.DataFrame(rows)

data_all = simular_pacientes()

team_dict = teams_info.set_index("team_id")[["base_lat", "base_lon"]].T.to_dict("list")
team_type_dict = teams_info.set_index("team_id")["team_type"].to_dict()


# Definición del entorno RL

In [None]:
import gymnasium as gym
from gymnasium import spaces

class RealisticHomeCareEnv(gym.Env):
    metadata = {'render_modes': ['human']}
    WORKING_DAY_END = 18 * 60

    def __init__(self, df, equipos_bases, team_types, n_teams=3, max_possible_visits_per_day=100, seed=GLOBAL_SEED):
        super().__init__()
        self.np_random, _ = gym.utils.seeding.np_random(seed)
        self.df = df.reset_index(drop=True)
        self.n_patients = len(df)
        self.n_teams = n_teams
        self.equipos_bases = {tid: np.array(pos) for tid, pos in equipos_bases.items()}
        self.team_types = team_types
        self.max_steps = max_possible_visits_per_day

        low_eq = [0.0, 0.0, 0.0] * self.n_teams
        high_eq = [10.0, 10.0, float(self.WORKING_DAY_END + 200)] * self.n_teams
        low_pat = [0.0, 0.0, 0.0, 0.0, 0.0] * self.n_patients
        high_pat = [10.0, 10.0, 1.0, float(24*60), float(24*60)] * self.n_patients

        low = np.array(low_eq + low_pat, dtype=np.float32)
        high = np.array(high_eq + high_pat, dtype=np.float32)
        self.observation_space = spaces.Box(low, high, dtype=np.float32)
        self.action_space = spaces.Discrete(self.n_teams * self.n_patients)
        self.reset()

    def reset(self, seed=None, options=None):
        if seed is not None:
            np.random.seed(seed)
        sorted_team_ids = sorted(self.equipos_bases.keys())
        self.equipos_status = {
            tid: {"pos": self.equipos_bases[tid].copy(), "time": 8 * 60, "visits": []}
            for tid in sorted_team_ids
        }
        self.patients_status = []
        for idx, row in self.df.iterrows():
            w_ini = min(int(row["window_start"][:2]) * 60 + int(row["window_start"][3:5]), 1439)
            w_end = min(int(row["window_end"][:2]) * 60 + int(row["window_end"][3:5]), 1439)
            self.patients_status.append({
                "patient_id": row["patient_id"],
                "pos": np.array([row["lat"], row["lon"]]),
                "attended": 0,
                "window_start": w_ini,
                "window_end": w_end,
                "estimated_care_time": row["estimated_care_time"],
                "required_team_type": row["required_team_type"],
                "actual_arrival_time": -1
            })
        self.steps_taken = 0
        self.total_reward = 0
        self.invalid_action_count = 0
        self.current_day_itinerary = []
        obs = self._get_obs()
        info = {}
        return obs, info

    def _get_obs(self):
        equipos_flat = []
        for tid in sorted(self.equipos_bases.keys()):
            equipos_flat.extend(self.equipos_status[tid]["pos"].tolist())
            equipos_flat.append(self.equipos_status[tid]["time"])
        patients_flat = []
        for pat in self.patients_status:
            patients_flat.extend(pat["pos"].tolist())
            patients_flat.append(pat["attended"])
            patients_flat.append(pat["window_start"])
            patients_flat.append(pat["window_end"])
        return np.array(equipos_flat + patients_flat, dtype=np.float32)

    def step(self, action):
        reward = 0
        done = False
        truncated = False

        team_idx = action // self.n_patients
        patient_idx = action % self.n_patients

        sorted_team_ids = sorted(self.equipos_bases.keys())
        team_id = sorted_team_ids[team_idx]
        team_current_pos = self.equipos_status[team_id]["pos"]
        team_current_time = self.equipos_status[team_id]["time"]
        team_type = self.team_types[team_id]

        patient_info = self.patients_status[patient_idx]
        patient_pos = patient_info["pos"]
        patient_attended = patient_info["attended"]
        patient_w_start = patient_info["window_start"]
        patient_w_end = patient_info["window_end"]
        patient_care_time = patient_info["estimated_care_time"]
        patient_required_type = patient_info["required_team_type"]

        is_invalid_action = False

        if patient_attended == 1:
            reward += -40  # Penalización menor
            is_invalid_action = True
        if patient_required_type not in team_type:
            reward += -40  # Penalización menor
            is_invalid_action = True

        self.steps_taken += 1

        if is_invalid_action:
            self.invalid_action_count += 1
        else:
            self.invalid_action_count = 0

        if not is_invalid_action:
            dist = np.linalg.norm(team_current_pos - patient_pos) * 10
            travel_time = int(dist * 5)
            arrival_time_at_patient = team_current_time + travel_time

            late_penalty = 0
            wait_time_penalty = 0
            bonus_in_window = 0
            bonus_any_visit = 0

            if arrival_time_at_patient > patient_w_end:
                retraso = arrival_time_at_patient - patient_w_end
                late_penalty = retraso
            if arrival_time_at_patient < patient_w_start:
                wait_time = patient_w_start - arrival_time_at_patient
                arrival_time_at_patient = patient_w_start
                wait_time_penalty = wait_time * 0.1

            visit_end_time = arrival_time_at_patient + patient_care_time

            overtime_penalty = 0
            if visit_end_time > self.WORKING_DAY_END:
                overtime = visit_end_time - self.WORKING_DAY_END
                overtime_penalty = overtime * 2

            self.equipos_status[team_id]["pos"] = patient_pos.copy()
            self.equipos_status[team_id]["time"] = visit_end_time
            self.patients_status[patient_idx]["attended"] = 1
            self.patients_status[patient_idx]["actual_arrival_time"] = arrival_time_at_patient

            if patient_w_start <= arrival_time_at_patient <= patient_w_end:
                bonus_in_window = 60
            bonus_any_visit = 40

            self.current_day_itinerary.append({
                "day": self.df.iloc[patient_idx]["day"],
                "patient_id": patient_info["patient_id"],
                "team_id": team_id,
                "arrival_time": arrival_time_at_patient,
                "end_time": visit_end_time,
                "travel_time": travel_time,
                "care_time": patient_care_time,
                "window_start": patient_w_start,
                "window_end": patient_w_end,
                "is_in_window": (patient_w_start <= arrival_time_at_patient <= patient_w_end)
            })

            reward += bonus_any_visit
            reward += bonus_in_window
            reward -= travel_time * 0.3
            reward -= late_penalty
            reward -= wait_time_penalty
            reward -= overtime_penalty

        self.total_reward += reward

        all_patients_attended = all(p["attended"] == 1 for p in self.patients_status)
        if all_patients_attended:
            done = True

        if self.steps_taken >= self.max_steps:
            done = True
            truncated = True
            unattended_count = sum(1 for p in self.patients_status if p["attended"] == 0)
            reward -= unattended_count * 20

        if self.invalid_action_count > self.n_patients * self.n_teams:
            done = True
            truncated = True

        if done:
            final_overtime_penalty = 0
            for tid in sorted(self.equipos_bases.keys()):
                team_final_pos = self.equipos_status[tid]["pos"]
                team_final_time = self.equipos_status[tid]["time"]
                base_pos = self.equipos_bases[tid]
                return_dist = np.linalg.norm(team_final_pos - base_pos) * 10
                return_travel_time = int(return_dist * 5)
                arrival_at_base_time = team_final_time + return_travel_time
                if arrival_at_base_time > self.WORKING_DAY_END:
                    final_overtime = arrival_at_base_time - self.WORKING_DAY_END
                    final_overtime_penalty += final_overtime * 5
            reward -= final_overtime_penalty
            self.total_reward += -final_overtime_penalty

        obs = self._get_obs()
        info = {
            "total_reward": self.total_reward,
            "current_itinerary": self.current_day_itinerary
        }
        return obs, reward, done, truncated, info


# Entrenamiento RL

In [None]:
from stable_baselines3 import DQN

# Entrena SOLO sobre UN día (misma dimensión que la evaluación)
train_day = data_all[data_all["day"] == 1].copy().reset_index(drop=True)
env_train = RealisticHomeCareEnv(
    train_day,
    team_dict,
    team_type_dict,
    n_teams=N_TEAMS,
    max_possible_visits_per_day=N_TEAMS*N_PACIENTES*2,
    seed=GLOBAL_SEED
)

model = DQN(
    "MlpPolicy",
    env_train,
    verbose=1,
    buffer_size=100_000,
    learning_starts=1_000,
    batch_size=64,
    train_freq=(4, "step"),
    target_update_interval=500,
    gamma=0.99,
    exploration_fraction=0.3,
    exploration_final_eps=0.05,
    learning_rate=3e-4,
    tensorboard_log="./tb_hhc_rl/"
)
model.learn(total_timesteps=100_000)
print("Entrenamiento RL finalizado.")


# Evaluación y visualización

In [None]:
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns

estadistica = []
itinerarios_completos = []

for test_day in range(1, N_DIAS + 1):
    df_day = data_all[data_all["day"] == test_day].copy().reset_index(drop=True)
    # Solo evalúa días con exactamente N_PACIENTES
    if len(df_day) != N_PACIENTES:
        print(f"Saltando día {test_day}: pacientes = {len(df_day)}, se requieren {N_PACIENTES}")
        continue

    env_eval = RealisticHomeCareEnv(
        df_day,
        team_dict,
        team_type_dict,
        n_teams=N_TEAMS,
        max_possible_visits_per_day=N_TEAMS * N_PACIENTES * 2,
        seed=GLOBAL_SEED
    )
    obs, info_reset = env_eval.reset()
    done = False
    truncated = False
    episode_reward = 0
    day_visits_info = []
    team_visit_order_counter = defaultdict(int)

    while not done and not truncated:
        # Asegura formato correcto (array 1D float32)
        if isinstance(obs, tuple):
            obs = obs[0]
        obs = np.asarray(obs, dtype=np.float32)
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env_eval.step(action)
        episode_reward += reward
        if len(info["current_itinerary"]) > len(day_visits_info):
            last_visit = info["current_itinerary"][-1]
            team_id = last_visit["team_id"]
            team_visit_order_counter[team_id] += 1
            last_visit["order_in_day_for_team"] = team_visit_order_counter[team_id]
            day_visits_info.append(last_visit)
    itinerarios_completos.extend(day_visits_info)
    ok_visits = sum(1 for v in day_visits_info if v["is_in_window"])
    total_successful_visits = len(day_visits_info)
    estadistica.append({
        "dia": test_day,
        "recompensa_total": episode_reward,
        "visitas_cumplidas_ventana": ok_visits,
        "total_visitas_realizadas": total_successful_visits,
        "pacientes_atendidos_final": sum(p["attended"] == 1 for p in env_eval.patients_status),
        "total_pacientes_dia": env_eval.n_patients
    })

df_stats = pd.DataFrame(estadistica)
print("\n--- Resumen de Cumplimiento por Día ---")
print(df_stats)
total_cumplidas_general = df_stats["visitas_cumplidas_ventana"].sum()
total_realizadas_general = df_stats["total_visitas_realizadas"].sum()
porcentaje_cumplimiento = (total_cumplidas_general / total_realizadas_general * 100) if total_realizadas_general > 0 else 0
print(f"\nPromedio general de visitas en ventana horaria: {total_cumplidas_general} / {total_realizadas_general} = {porcentaje_cumplimiento:.1f}%")
print(f"Recompensa promedio por episodio: {df_stats['recompensa_total'].mean():.2f}")

plt.figure(figsize=(10, 5))
plt.bar(df_stats["dia"], df_stats["visitas_cumplidas_ventana"], color="green", label="Visitas en Ventana")
plt.bar(df_stats["dia"], df_stats["total_visitas_realizadas"] - df_stats["visitas_cumplidas_ventana"],
        bottom=df_stats["visitas_cumplidas_ventana"], color="orange", label="Visitas Fuera de Ventana")
plt.plot(df_stats["dia"], df_stats["total_pacientes_dia"], "--", color="blue", label="Total Pacientes Dia")
plt.xlabel("Día")
plt.ylabel("Número de Visitas")
plt.title("Rendimiento del Agente por Día")
plt.legend()
plt.xticks(df_stats["dia"])
plt.tight_layout()
plt.show()

df_it_full = pd.DataFrame(itinerarios_completos)
if not df_it_full.empty:
    df_it_full["hora"] = df_it_full["arrival_time"].apply(lambda t: f"{int(t//60):02d}:{int(t%60):02d}")
    df_it_full["hora_fin"] = df_it_full["end_time"].apply(lambda t: f"{int(t//60):02d}:{int(t%60):02d}")
    df_it_full["cumplimiento"] = df_it_full["is_in_window"].replace({True: "En ventana", False: "Fuera de ventana"})
    df_it_full_sorted = df_it_full.sort_values(by=["day", "team_id", "order_in_day_for_team"])

    print("\nPrimeras 20 visitas detalladas de los itinerarios generados:")
    print(df_it_full_sorted[['day', 'order_in_day_for_team', 'team_id', 'patient_id', 'hora', 'hora_fin', 'cumplimiento', 'travel_time', 'care_time']].head(20).to_string())

    # Gráfico por equipo
    df_it_full_sorted["tipo_equipo"] = df_it_full_sorted["team_id"].map(team_type_dict)
    plt.figure(figsize=(8, 4))
    sns.countplot(
        data=df_it_full_sorted,
        x="team_id", hue="cumplimiento",
        palette={"En ventana": "green", "Fuera de ventana": "orange"}
    )
    plt.title("Cumplimiento de visitas por equipo")
    plt.xlabel("Equipo")
    plt.ylabel("Número de visitas")
    plt.legend(title="Cumplimiento")
    plt.tight_layout()
    plt.show()

    # Gráfico por tipo de equipo
    plt.figure(figsize=(8, 4))
    sns.countplot(
        data=df_it_full_sorted,
        x="tipo_equipo", hue="cumplimiento",
        palette={"En ventana": "green", "Fuera de ventana": "orange"}
    )
    plt.title("Cumplimiento de visitas por tipo de equipo")
    plt.xlabel("Tipo de Equipo")
    plt.ylabel("Número de visitas")
    plt.legend(title="Cumplimiento")
    plt.tight_layout()
    plt.show()

    # Exportar a Excel para reporte
    df_it_full_sorted.to_excel("itinerario_rl_hhc.xlsx", index=False)
    print("\nItinerario exportado a 'itinerario_rl_hhc.xlsx'")
else:
    print("No hay datos de itinerario para graficar.")


# Generar y guardar datos simulados

In [None]:
import numpy as np
import pandas as pd
import os

# Make sure the 'Data' directory exists
os.makedirs('Data', exist_ok=True)

# Simulation parameters
N_DAYS = 10
N_PATIENTS = 8

rows = []
for d in range(1, N_DAYS + 1):
    for i in range(1, N_PATIENTS + 1):
        lat = np.random.uniform(1, 9)
        lon = np.random.uniform(1, 9)
        start = np.random.randint(8, 15)
        end = start + np.random.randint(2, 4)
        care_time = np.random.randint(25, 50)
        tipo = np.random.choice(["A", "B"])
        rows.append({
            "day": d,
            "patient_id": f"P{i}_Day{d}",
            "lat": lat,
            "lon": lon,
            "window_start": f"{start:02d}:00",
            "window_end": f"{end:02d}:00",
            "estimated_care_time": care_time,
            "required_team_type": tipo
        })

df = pd.DataFrame(rows)
df.to_csv("Data/simulated_hhc_data.csv", index=False)
print('Data saved to Data/simulated_hhc_data.csv')


In [None]:
import numpy as np
import pandas as pd
import os

# Make sure the 'Data' directory exists
os.makedirs('Data', exist_ok=True)

# Simulation parameters
N_DAYS = 10
N_PATIENTS = 8

rows = []
for d in range(1, N_DAYS + 1):
    for i in range(1, N_PATIENTS + 1):
        lat = np.random.uniform(1, 9)
        lon = np.random.uniform(1, 9)
        start = np.random.randint(8, 15)
        end = start + np.random.randint(2, 4)
        care_time = np.random.randint(25, 50)
        tipo = np.random.choice(["A", "B"])
        rows.append({
            "day": d,
            "patient_id": f"P{i}_Day{d}",
            "lat": lat,
            "lon": lon,
            "window_start": f"{start:02d}:00",
            "window_end": f"{end:02d}:00",
            "estimated_care_time": care_time,
            "required_team_type": tipo
        })

df = pd.DataFrame(rows)
df.to_csv("Data/simulated_hhc_data.csv", index=False)
print('Data saved to Data/simulated_hhc_data.csv')


Data saved to Data/simulated_hhc_data.csv
