<a href="https://colab.research.google.com/github/marieandrepayfit/Marie-Andr-/blob/main/TEST_de_Daily_occupancy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from google.colab import auth
from google.auth import default

try:
  import gspread
except ModuleNotFoundError:
  if 'google.colab' in str(get_ipython()):
    %pip install gspread
  import gspread

def calculate_occupancy_ranges_with_additional_metrics(df_sf, df_intercom, daily_working_hours=7.8):
    """
    df_sf_V3 : https://payfit.eu.looker.com/explore/customer_success/cs_metrics?qid=X8P3JQXodONwAIGLIKuUeR&origin_space=2180&toggle=fil
    df_intercom_V3 : https://payfit.eu.looker.com/explore/customer_success/cs_metrics?qid=tFJthLmYABynCLSIh2FQ7d&origin_space=2180&toggle=fil
    """

    # Convert event datetime to pandas datetime
    df_sf['Clock out'] = pd.to_datetime(df_sf['1.1 - Events Event Date Second'], errors='coerce')
    df_sf['Date day'] = pd.to_datetime(df_sf['1.1 - Events Event Date Date'], errors='coerce')
    #-#
    df_intercom['Clock out'] = pd.to_datetime(df_intercom['1.1 - Events Event Date Second'], errors='coerce')
    df_intercom['Date day'] = pd.to_datetime(df_intercom['1.1 - Events Event Date Date'], errors='coerce')

    # Define columns name
    df_sf['Agent Email'] = df_sf['2.2 - Payfiter - Event Modifier - Dynamic Payfiter e-mail']
    df_sf['Service Level'] = df_sf['2.2 - Payfiter - Event Modifier - Dynamic Service Level']
    df_sf['Case ID'] = df_sf['1.2 - Cases Case ID']
    df_sf['Date day'] = df_sf['1.1 - Events Event Date Date']
    df_sf['Duration ci-co (s)'] = pd.to_numeric(df_sf['1.1 - Events Effective Time Spent Salesforce'], errors='coerce')
    df_sf['Country'] = df_sf['2.2 - Payfiter - Event Modifier - Dynamic Scope country code']
    df_sf['Duration SF (s)'] = df_sf['Duration ci-co (s)']
    df_sf['Duration Intercom (s)'] = 0
    #-#
    df_intercom['Agent Email'] = df_intercom['2.1 - Payfiter - Event Owner - Dynamic Payfiter e-mail']
    df_intercom['Service Level'] = df_intercom['2.1 - Payfiter - Event Owner - Dynamic Service Level']
    df_intercom['Case ID'] = df_intercom['1.2 - Cases Case ID']
    df_intercom['Date day'] = df_intercom['1.1 - Events Event Date Date']
    df_intercom['Duration ci-co (s)'] = pd.to_numeric(df_intercom['1.1 - Events Effective Time Spent Intercom'], errors='coerce')
    df_intercom['Country'] = df_intercom['2.1 - Payfiter - Event Owner - Dynamic Scope country code']
    df_intercom['Duration SF (s)'] = 0
    df_intercom['Duration Intercom (s)'] = df_intercom['Duration ci-co (s)']

    # Merge the two DataFrames
    merged_df = pd.merge(df_sf, df_intercom, on=['Agent Email', 'Service Level', 'Case ID', 'Date day', 'Duration ci-co (s)', 'Country', 'Clock out', 'Duration SF (s)', 'Duration Intercom (s)'], how='outer', indicator=True)
    #print(merged_df.columns)

    # Add measure for counting clock-outs at 8pm
    merged_df['Clock Out Hour'] = merged_df['Clock out'].dt.hour
    merged_df['Clock Out Minute'] = merged_df['Clock out'].dt.minute
    merged_df['Clock Out at 20:00?'] = ((merged_df['Clock Out Hour'] == 20) & (merged_df['Clock Out Minute'] == 00))
    # Add measure for counting ci-co during lunch
    merged_df['Clock In'] = merged_df['Clock out'] - pd.to_timedelta(merged_df['Duration ci-co (s)'], unit='s')
    merged_df['Clock In Hour'] = merged_df['Clock In'].dt.hour
    merged_df['Clock In Minute'] = merged_df['Clock In'].dt.minute
    merged_df['Clock In/Out lunch?'] = ((merged_df['Clock In Hour'] >= 11) & (merged_df['Clock In Hour'] <= 12) & (merged_df['Clock In Minute'] >= 30) & (merged_df['Clock Out Hour'] >= 13) & (merged_df['Clock Out Hour'] <= 14) & (merged_df['Clock Out Minute'] >= 30))

    # Add a new column for the duration during lunch
    merged_df['Duration during Lunch (s)'] = 0
    # Filter rows where 'Clock In/Out lunch?' is True
    lunch_filter = merged_df['Clock In/Out lunch?']
    # Calculate the duration during lunch for rows where 'Clock In/Out lunch?' is True
    merged_df.loc[lunch_filter, 'Duration during Lunch (s)'] = merged_df.loc[lunch_filter, 'Duration ci-co (s)']

    # Exclude rows where the date of 'Clock In' is different from the date of 'Clock Out'
    merged_df = merged_df[merged_df['Clock In'].dt.date == merged_df['Clock out'].dt.date]

    # Flag aberrant values based on service level
    merged_df['Aberrant Duration'] = np.where((merged_df['Service Level'] == 'CCR') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                      np.where((merged_df['Service Level'] == 'APS') & (merged_df['Duration ci-co (s)'] > 18000), 1, #5h
                                               np.where((merged_df['Service Level'] == 'OBS') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                        np.where((merged_df['Service Level'] == 'CSM - Low touch') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                 np.where((merged_df['Service Level'] == 'CSM - Medium touch') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                          np.where((merged_df['Service Level'] == 'CSM - High touch') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                   np.where((merged_df['Service Level'] == 'Decla - DSN évènementielles') & (merged_df['Duration ci-co (s)'] > 12600), 1, #3,5h
                                                                                            np.where((merged_df['Service Level'] == 'Declaration - DSN mensuelles') & (merged_df['Duration ci-co (s)'] > 12600), 1, #3,5h
                                                                                                     np.where((merged_df['Service Level'] == 'Decla - Investigation') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                              np.where((merged_df['Service Level'] == 'Decla - Paramétrage') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                                       np.where((merged_df['Service Level'] == 'CSM') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                                                np.where((merged_df['Service Level'] == 'CCM') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                                                         np.where((merged_df['Service Level'] == 'Ext CCR') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                                                                  np.where((merged_df['Service Level'] == 'Ext CSM/AM') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                                                                           np.where((merged_df['Service Level'] == 'Ext Evenementielles') & (merged_df['Duration ci-co (s)'] > 12600), 1, #3,5h
                                                                                                                                                                    np.where((merged_df['Service Level'] == 'Ext Mensuelles') & (merged_df['Duration ci-co (s)'] > 12600), 1, #3,5h
                                                                                                                                                                             np.where((merged_df['Service Level'] == 'Ext Paramétrages') & (merged_df['Duration ci-co (s)'] > 9000), 1, #2,5h
                                                                                                                                                                                      np.where((merged_df['Service Level'] == 'Ext OB') & (merged_df['Duration ci-co (s)'] > 9000), 1, 0)))))))))))))))))) #2,5h


    # Calculation Moving Medians (last 30 days)
    # Convert 'Date day' in merged_df to datetime and sort
    merged_df['Date day'] = pd.to_datetime(merged_df['Date day'], errors='coerce')
    merged_df.sort_values(by=['Clock out', 'Agent Email'], inplace=True)
    # Filter merged_df to calculate the median without clock out auto and aberrant duration
    filtered_df = merged_df[(merged_df['Clock Out at 20:00?'] == False) &
                            (merged_df['Aberrant Duration'] == False) &
                            (merged_df['Duration ci-co (s)'] != 0)]
    # Calculate the moving median per IC based on the last 30 days
    filtered_df.loc[:, 'Median Duration on the last 30 days'] = filtered_df.groupby(['Agent Email'])['Duration ci-co (s)'].transform(lambda x: x.rolling(window=30, min_periods=1).median())
    # Merge the DataFrames
    merged_df = pd.merge(merged_df, filtered_df[['Agent Email', 'Date day', 'Clock out', 'Median Duration on the last 30 days']], how='left')
    # Replace NaN values (when clock out auto or aberrant duration) with the previous median of the same Date day and Agent Email
    merged_df.sort_values(by=['Clock out', 'Date day', 'Agent Email'], inplace=True)
    merged_df['Median Duration on the last 30 days'] = merged_df.groupby(['Agent Email', 'Date day'])['Median Duration on the last 30 days'].fillna(method='ffill')

    # Calculate daily totals per IC
    daily_totals = merged_df.groupby(['Country', 'Service Level', 'Agent Email', 'Date day']).agg({
        'Duration ci-co (s)': 'sum',
        'Clock Out at 20:00?' : 'sum',
        'Clock In/Out lunch?' : 'sum',
        'Case ID': lambda x: x.tolist(),
        'Aberrant Duration' : 'sum',
        'Duration SF (s)' : 'sum',
        'Duration Intercom (s)' :'sum',
        'Median Duration on the last 30 days' : 'sum',
        'Duration during Lunch (s)' : 'sum'
    }).reset_index()

    # Replace 'Duration ci-co (s)' with median when 'Aberrant Duration' is True
    merged_df['Duration ci-co Adjusted aberrant (s)'] = merged_df.apply(lambda row: row['Median Duration on the last 30 days'] if (row['Aberrant Duration'] and row['Median Duration on the last 30 days'] < row['Duration ci-co (s)']) else row['Duration ci-co (s)'],axis=1)
    # Replace 'Duration ci-co (s)' with median when 'Clock Out at 20:00?' is True
    merged_df['Duration ci-co Adjusted co 20:00 (s)'] = merged_df.apply(lambda row: row['Median Duration on the last 30 days'] if (row['Clock Out at 20:00?'] and row['Median Duration on the last 30 days'] < row['Duration ci-co (s)']) else row['Duration ci-co (s)'], axis=1)
    # Combine both adjustments in a single metric
    merged_df['Duration ci-co Adjusted (s)'] = merged_df.apply(lambda row: row['Median Duration on the last 30 days'] if (row['Aberrant Duration'] or row['Clock Out at 20:00?']) and (row['Median Duration on the last 30 days'] < row['Duration ci-co (s)']) else row['Duration ci-co (s)'], axis=1)

    # Add the calculation of the sum of Durations per day and per IC
    sum_duration_aberrant_per_day_ic = merged_df.groupby(['Date day', 'Agent Email'])['Duration ci-co Adjusted aberrant (s)'].sum().reset_index()
    sum_duration_co20_per_day_ic = merged_df.groupby(['Date day', 'Agent Email'])['Duration ci-co Adjusted co 20:00 (s)'].sum().reset_index()
    sum_duration_adjusted_per_day_ic = merged_df.groupby(['Date day', 'Agent Email'])['Duration ci-co Adjusted (s)'].sum().reset_index()

    daily_totals = pd.merge(daily_totals, sum_duration_aberrant_per_day_ic, on=['Date day', 'Agent Email'], how='left')
    daily_totals = pd.merge(daily_totals, sum_duration_co20_per_day_ic, on=['Date day', 'Agent Email'], how='left')
    daily_totals = pd.merge(daily_totals, sum_duration_adjusted_per_day_ic, on=['Date day', 'Agent Email'], how='left')

    # Convert 'Duration ci-co' to numeric
    daily_totals['Duration ci-co'] = pd.to_numeric(daily_totals['Duration ci-co (s)'], errors='coerce')

    # Metrics
    daily_totals['# Treated cases'] = daily_totals['Case ID'].apply(lambda x: len(set(x)))

### PER SERVICE LEVEL ###
    # Summarize per Service Level
    occupancy_summary_service_level = daily_totals.groupby(['Date day', 'Country', 'Service Level']).agg({
        '# Treated cases': 'sum',
        'Case ID': lambda x: x.tolist(),
        'Clock Out at 20:00?': 'sum',
        'Clock In/Out lunch?': 'sum',
        'Duration ci-co (s)' : 'mean',
        'Duration ci-co Adjusted aberrant (s)' : 'mean',
        'Duration ci-co Adjusted co 20:00 (s)' : 'mean',
        'Duration ci-co Adjusted (s)' : 'mean',
        'Aberrant Duration' : 'sum',
        'Duration SF (s)' : 'mean',
        'Duration Intercom (s)' : 'mean',
        'Duration during Lunch (s)' : 'mean'
    }).reset_index()

    # Rename metrics if needed
    occupancy_summary_service_level = occupancy_summary_service_level.rename(columns={'Clock Out at 20:00?': '# Clock Out at 20:00'})
    occupancy_summary_service_level = occupancy_summary_service_level.rename(columns={'Clock In/Out lunch?': '# Clock In/Out lunch'})
    occupancy_summary_service_level = occupancy_summary_service_level.rename(columns={'Aberrant Duration': '# Aberrant Duration'})

    # Metrics
    occupancy_summary_service_level['Avg Working time (h)'] = occupancy_summary_service_level['Duration ci-co (s)'] / 3600
    occupancy_summary_service_level['% Occupancy'] = occupancy_summary_service_level['Avg Working time (h)'] / daily_working_hours * 100
    occupancy_summary_service_level['Avg Working time Adjusted aberrant (h)'] = occupancy_summary_service_level['Duration ci-co Adjusted aberrant (s)'] / 3600
    occupancy_summary_service_level['Avg Working time Adjusted co 20:00 (h)'] = occupancy_summary_service_level['Duration ci-co Adjusted co 20:00 (s)'] / 3600
    occupancy_summary_service_level['Avg Working time Adjusted (h)'] = occupancy_summary_service_level['Duration ci-co Adjusted (s)'] / 3600
    occupancy_summary_service_level['% Occupancy Adjusted'] = occupancy_summary_service_level['Avg Working time Adjusted (h)'] / daily_working_hours * 100
    occupancy_summary_service_level['Avg ci-co SF (h)'] = occupancy_summary_service_level['Duration SF (s)'] / 3600
    occupancy_summary_service_level['Avg ci-co Intercom (h)'] = occupancy_summary_service_level['Duration Intercom (s)'] / 3600
    occupancy_summary_service_level['Avg ci-co during lunch (h)'] = occupancy_summary_service_level['Duration during Lunch (s)'] / 3600

    # Reorder the columns to the specified order and sort by 'Service Level'
    columns_order = ['Country', 'Date day', 'Service Level', '# Treated cases', '# Aberrant Duration', '# Clock Out at 20:00', '# Clock In/Out lunch', 'Avg ci-co SF (h)', 'Avg ci-co Intercom (h)', 'Avg ci-co during lunch (h)', 'Avg Working time (h)', 'Avg Working time Adjusted (h)', '% Occupancy', '% Occupancy Adjusted']
    occupancy_summary_service_level = occupancy_summary_service_level[columns_order]

    # Sort by 'Service Level'
    occupancy_summary_service_level = occupancy_summary_service_level.sort_values(by=['Country', 'Service Level', 'Date day'], ascending=True)
    occupancy_summary_service_level = occupancy_summary_service_level.set_index('% Occupancy Adjusted', drop=False)
    return occupancy_summary_service_level

### ... ###

# Copy databases from Gsheets
# Authentication and authorization
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)
# Open spreadsheet
worksheet = gc.open('% occupancy')
# Replace df_sf
worksheet_sf = worksheet.get_worksheet(0)  # Assuming the first worksheet, adjust if needed
data_sf = worksheet_sf.get_all_values()
df_sf = pd.DataFrame(data_sf[1:], columns=data_sf[0])
# Replace df_intercom
worksheet_intercom = worksheet.get_worksheet(1)  # Assuming the second worksheet, adjust if needed
data_intercom = worksheet_intercom.get_all_values()
df_intercom = pd.DataFrame(data_intercom[1:], columns=data_intercom[0])

# Apply the function and get the summary
occupancy_summary_with_metrics = calculate_occupancy_ranges_with_additional_metrics(df_sf, df_intercom)# Assuming 'merged_df' is your final DataFrame
occupancy_summary_with_metrics = occupancy_summary_with_metrics.round(2) # 2 decimals in the final table
occupancy_summary_with_metrics  # Display the summary
occupancy_summary_with_metrics['Date day'] = occupancy_summary_with_metrics['Date day'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else x)

# Copy final table to Gsheets
# Authentication and authorization
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)
# Open spreadsheet
worksheet = gc.open('% occupancy')
sheet6 = worksheet.get_worksheet(5)
occupancy_summary_with_metrics = pd.DataFrame(occupancy_summary_with_metrics)
sheet6.clear() #clean the tab
sheet6.update([occupancy_summary_with_metrics.columns.values.tolist()] + occupancy_summary_with_metrics.fillna(-1).values.tolist()) #copy the table