In [None]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load data
file_path = '../data/ChallengeXHEC23022024.xlsx'
excel_data = pd.ExcelFile(file_path)


jan24_df = pd.read_excel(excel_data, sheet_name='JAN24')
clients_df = pd.read_excel(excel_data, sheet_name='clients')
intervenants_df = pd.read_excel(excel_data, sheet_name='intervenants')

# Analyze start/end times for each type of service

In [None]:
# Analyze start/end times for each type of service
jan24_df['Heure de début'] = pd.to_datetime(jan24_df['Heure de début'], format='%H:%M:%S').dt.time
jan24_df['Heure de fin'] = pd.to_datetime(jan24_df['Heure de fin'], format='%H:%M:%S').dt.time
service_constraints = jan24_df.groupby('Prestation').agg({'Heure de début': ['min', 'max'], 'Heure de fin': ['min', 'max']})
service_constraints.columns = ['Start Time Earliest', 'Start Time Latest', 'End Time Earliest', 'End Time Latest']
service_constraints.reset_index(inplace=True)


display(service_constraints)

In [None]:
jan24_df['Heure de début'] = pd.to_datetime(jan24_df['Heure de début'], format='%H:%M:%S')
jan24_df['Heure de fin'] = pd.to_datetime(jan24_df['Heure de fin'], format='%H:%M:%S')
jan24_df["Duration"] = jan24_df["Heure de fin"] - jan24_df["Heure de début"]
jan24_df["Duration_int"] = jan24_df["Duration"].dt.total_seconds()
jan24_df.groupby("Prestation")["Duration_int"].hist(alpha=0.5, legend=True);

In [None]:
# find the frequency of the time of day in which the event occurs
import datetime as dt
hours = [dt.datetime(year=1900, month=1, day=1, hour=i) for i in range(8, 23)]
for i, df_prest in jan24_df.groupby("Prestation"):
    # display(df_prest)
    plt.hist(df_prest["Heure de début"], alpha=0.5, label="Start Times", bins=hours)
    plt.hist(df_prest["Heure de fin"], alpha=0.5, label="End Times", bins=hours)
    plt.xticks(rotation=90) 
    plt.title(f"Start and end times for {i}")   
    plt.legend()
    plt.show()


In [None]:
# All start times for all prestations
jan24_df.groupby("Prestation").apply(lambda x: x["Heure de début"].value_counts()).to_frame().style

In [None]:
# Find if prestations have a fixed duration
jan24_df.groupby("Prestation").apply(lambda x: len(x["Duration"].value_counts()))

In [None]:
# Average Duration per prestation
jan24_df.groupby("Prestation")["Duration"].mean()

# Segment clients according to their needs

In [None]:
# Segment clients according to their needs
client_needs = jan24_df.groupby(['ID Client', 'Prestation']).size().unstack(fill_value=0)

display(client_needs)

In [None]:
# Find clients who have similar needs
client_segments = {}
client_need_list = jan24_df.groupby("ID Client").apply(lambda x: x["Prestation"].unique()).rename("Prestations").astype(str).reset_index()
for i, list in client_need_list.groupby("Prestations"):
   client_segments[i] = list["ID Client"].values

In [None]:
client_segments

# Segment days according to the services offered

In [None]:
# Segment days according to the services offered
jan24_df['Day of Week'] = jan24_df['Date'].dt.dayofweek
jan24_df['Day Type'] = jan24_df['Day of Week'].apply(lambda x: 'Weekend' if x > 4 else 'Weekday')
day_segmentation = jan24_df.groupby(['Day Type', 'Prestation']).size().unstack(fill_value=0)

display(day_segmentation)


# Plotting agenda of Caretakers

In [None]:
jan24_df['Start DateTime'] = pd.to_datetime(jan24_df['Date'].astype(str) + ' ' + jan24_df['Heure de début'].astype(str))
jan24_df['End DateTime'] = pd.to_datetime(jan24_df['Date'].astype(str) + ' ' + jan24_df['Heure de fin'].astype(str))

In [None]:
def plot_agenda_for_intervenant(intervenant_id):
    intervenant_agenda = jan24_df[jan24_df['ID Intervenant'] == intervenant_id]
    intervenant_agenda_sorted = intervenant_agenda.sort_values(by=['Date', 'Heure de début'])

    df_timeline = intervenant_agenda_sorted.copy()
    df_timeline['Start'] = pd.to_datetime(df_timeline['Start DateTime'])
    df_timeline['Finish'] = pd.to_datetime(df_timeline['End DateTime'])
    df_timeline['Task'] = df_timeline['Prestation']
    df_timeline['Resource'] = df_timeline['ID Intervenant'].astype(str)

    fig = px.timeline(df_timeline, x_start="Start", x_end="Finish", y="Task", color="Resource")
    fig.update_yaxes(autorange="reversed")
    fig.update_layout(title=f"Agenda for Intervenant ID: {intervenant_id}")

    fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1D', step='day', stepmode='backward'),
                dict(count=7, label='1W', step='day', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type='date'
    )
)

    fig.show()

In [None]:
intervenant_agenda = jan24_df[jan24_df['ID Intervenant'] == intervenant_id]
intervenant_agenda_sorted = intervenant_agenda.sort_values(by=['Date', 'Heure de début'])

df_timeline = intervenant_agenda_sorted.copy()
df_timeline['Start'] = pd.to_datetime(df_timeline['Start DateTime'])
df_timeline['Finish'] = pd.to_datetime(df_timeline['End DateTime'])
df_timeline['Task'] = df_timeline['Prestation']
df_timeline['Resource'] = df_timeline['ID Intervenant'].astype(str)


In [None]:
for intervenant_id in jan24_df['ID Intervenant'].unique():
    plot_agenda_for_intervenant(intervenant_id)