# Optimisation Results Analysis

In [None]:
from pathlib import Path

import pandas as pd

from src.dataloader import get_commute_data
from src.utils import plot_agenda, preprocess_schedules

In [None]:
KIND = "driving"

## Load data

In [None]:
results_dir = Path("../results/question_1_a")
paths = results_dir.glob("*.csv")

files = []
for file_ in paths:
    files.append(pd.read_csv(file_))
df_optimised = pd.concat(files, axis=0)
df_optimised = df_optimised.sort_values(["Heure de début", "Heure de fin"])

In [None]:
commute_file_paths = [
    "../data/commute_bicycling_clients.csv",
    "../data/commute_driving_clients.csv",
    "../data/commute_bicycling_care_clients.csv",
    "../data/commute_bicycling_clients_care.csv",
    "../data/commute_driving_care_clients.csv",
    "../data/commute_driving_clients_care.csv",
]

commute_data_df = get_commute_data(commute_file_paths)
caregivers = pd.read_excel("../data/ChallengeXHEC23022024.xlsx", sheet_name=2)

In [None]:
optimised_sched = preprocess_schedules(
    df_optimised, caregivers, sched="optimised", kind=KIND
)

In [None]:
schedule = pd.read_excel("../data/ChallengeXHEC23022024.xlsx", sheet_name=0)

discard_list = [
    "ADMINISTRATION",
    "VISITE MEDICALE",
    "FORMATION",
    "COORDINATION",
    "HOMMES TOUTES MAINS",
]

schedule = schedule[~schedule.Prestation.isin(discard_list)]
given_sched = preprocess_schedules(
    schedule, caregivers, sched="given", kind=KIND
)

## Plot given agenda

In [None]:
all_intervenant_agendas = []

for intervenant_id in optimised_sched["ID Intervenant"].unique():
    # NOTE: change kind here depending on execution above
    intervenant_agenda_commute = plot_agenda(
        intervenant_id, given_sched, commute_data_df, kind=KIND
    )
    all_intervenant_agendas.append(intervenant_agenda_commute)

df_agendas_given = pd.concat(all_intervenant_agendas)

## Plot optimised agenda

In [None]:
all_intervenant_agendas = []

for intervenant_id in optimised_sched["ID Intervenant"].unique():
    # NOTE: change kind here depending on execution above
    intervenant_agenda_commute = plot_agenda(
        intervenant_id, optimised_sched, commute_data_df, kind=KIND
    )
    all_intervenant_agendas.append(intervenant_agenda_commute)

df_agendas_opt = pd.concat(all_intervenant_agendas)

## Comparison of metrics

In [None]:
print("Commute time in Jan 24")
print(
    f"Given Schedule (in h): {df_agendas_given.loc[(df_agendas_given['Task'] == 'Commute Time'), 'Commute Time'].sum() / 60:.2f}"
)
print(
    f"Optimised Schedule (in h): {df_agendas_opt.loc[(df_agendas_opt['Task'] == 'Commute Time'), 'Commute Time'].sum() / 60:.2f}"
)

In [None]:
time_given = (
    df_agendas_given[(df_agendas_given["Task"] == "Commute Time")]
    .groupby(["Date", "ID Intervenant"])["Commute Time"]
    .sum()
    .groupby("Date")
    .mean()
    .mean()
)

time_opt = (
    df_agendas_opt[(df_agendas_opt["Task"] == "Commute Time")]
    .groupby(["Date", "ID Intervenant"])["Commute Time"]
    .sum()
    .groupby("Date")
    .mean()
    .mean()
)

print("Avg. commute time per caregiver per day")
print(f"Given Schedule (in min): {time_given:.2f}")
print(f"Optimised Schedule (in min): {time_opt:.2f}")

In [None]:
print("Number short downtimes in Jan 24")
print(
    f"Given Schedule: {df_agendas_given.loc[(df_agendas_given['Task'] == 'Wait Time'), 'Wait Time'].count()}"
)
print(
    f"Optimised Schedule: {df_agendas_opt.loc[(df_agendas_opt['Task'] == 'Wait Time'), 'Wait Time'].count()}"
)

In [None]:
print("Short downtimes in Jan 24")
print(
    f"Given Schedule (in h): {df_agendas_given.loc[(df_agendas_given['Task'] == 'Wait Time'), 'Wait Time'].sum() / 60:.2f}"
)
print(
    f"Optimised Schedule (in h): {df_agendas_opt.loc[(df_agendas_opt['Task'] == 'Wait Time'), 'Wait Time'].sum() / 60:.2f}"
)

In [None]:
cnt_given = (
    df_agendas_given[(df_agendas_given["Task"] == "Wait Time")]
    .groupby(["Date", "ID Intervenant"])["Wait Time"]
    .count()
    .groupby("Date")
    .mean()
    .mean()
)
cnt_opt = (
    df_agendas_opt[(df_agendas_opt["Task"] == "Wait Time")]
    .groupby(["Date", "ID Intervenant"])["Wait Time"]
    .count()
    .groupby("Date")
    .mean()
    .mean()
)

print("Avg. nr. short downtimes per caregiver per day:")
print(f"Given schedule: {cnt_given:.2f}")
print(f"Optimised schedule: {cnt_opt:.2f}")

In [None]:
commute_given = df_agendas_given.loc[
    (df_agendas_given["Task"] == "Commute Time")
    & (df_agendas_given["Commute Method"] == "driving"),
    "Commute Meters",
].sum()
commute_opt = df_agendas_opt.loc[
    (df_agendas_opt["Task"] == "Commute Time")
    & (df_agendas_opt["Commute Method"] == "driving"),
    "Commute Meters",
].sum()
print("Commute meters in Jan 24")
print(f"Given Schedule (in km): {commute_given / 1000:.2f}")
print(f"Optimised Schedule (in km): {commute_opt / 1000:.2f}")

In [None]:
df_agendas_given[(df_agendas_given["Task"] == "Commute Time")].groupby(
    ["Date", "ID Intervenant"]
)["Commute Time"].sum()

In [None]:
temp = (
    df_agendas_opt[(df_agendas_opt["Task"] == "Commute Time")]
    .groupby(["Date", "ID Intervenant"])["Commute Time"]
    .sum()
)

In [None]:
temp = temp.reset_index()
temp = temp[temp.Date == "2024-01-03"]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(15, 3))
sns.boxplot(x=temp["Commute Time"], color="#36414C")