In [None]:
import pandas as pd
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta 
import mydashtools as dt
import plotly.express as px
import hashlib
pd.options.plotting.backend = "plotly"
if dt.is_notebook():
    __file__ = "notebook/"

services_baristas_annule = pd.read_csv(os.path.dirname(os.path.realpath(__file__))+"/../calendar/baristas.csv", sep=";",decimal=',',low_memory=False, index_col=False)
services_baristas_annule['Mois'] = pd.to_datetime(services_baristas_annule['date'].apply(lambda x: x[0:7]+'-01'))
services_baristas_annule['Date'] = pd.to_datetime(services_baristas_annule['date'])
del services_baristas_annule['date']

limit_year = None
if os.getenv("LIMIT_YEAR") is not None:
    limit_year = os.getenv("LIMIT_YEAR")
limit_year = "2023"
now = datetime.now()
if limit_year:
    services_baristas_annule = services_baristas_annule[services_baristas_annule['Mois'] <= limit_year+'-12-31']
    now = max(services_baristas_annule['Mois'])

services_baristas_annule = services_baristas_annule[services_baristas_annule['Date'] <= str(now)]
services_baristas_annule['nom'] = services_baristas_annule['nom'].str.title()
t2n = services_baristas_annule.groupby('telephone')['nom'].first().to_dict()
n2t = services_baristas_annule.groupby('nom')['telephone'].first().to_dict()
services_baristas_annule['telephone_nom'] = services_baristas_annule['nom'].map(n2t)
services_baristas_annule['telephone'] = services_baristas_annule['telephone_nom'].combine_first(services_baristas_annule['telephone']).fillna('')
services_baristas_annule['nom_telephone'] = services_baristas_annule['telephone'].map(t2n)
services_baristas_annule['nom'] = services_baristas_annule['nom_telephone'].combine_first(services_baristas_annule['nom'])
services_baristas_annule['nom_tel'] = services_baristas_annule['nom'].fillna('') + services_baristas_annule['telephone'].fillna('')
services_baristas_annule['barista_hash'] = services_baristas_annule['nom_tel'].apply(lambda x: hashlib.md5(x.encode()).hexdigest())
del services_baristas_annule['nom_telephone']
del services_baristas_annule['telephone_nom']
del services_baristas_annule['nom_tel']

In [None]:
creneaux_annules = services_baristas_annule[services_baristas_annule['role'] == "CRENEAU ANNULE"][['Mois', 'role']].groupby('Mois').count()
creneaux_annules['Année'] = creneaux_annules.index.year
creneaux_annules = creneaux_annules.reset_index().set_index(['Mois', 'Année']).unstack('Année').fillna(0)
creneaux_annules.columns = creneaux_annules.columns.droplevel(0)

fig = px.bar(creneaux_annules)
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_01_creneaux_annules', limit_year)

In [None]:
limit_date_year = now + relativedelta(months=-6)
creneaux_annules_year = services_baristas_annule[services_baristas_annule['Mois'] > limit_date_year]
creneaux_annules_year['Nb services annulés'] = creneaux_annules_year['role'].apply(lambda x : 1 if x == "CRENEAU ANNULE" else 0)
creneaux_annules_year['Service Semaine Order'] = creneaux_annules_year['Date'].dt.weekday.map(str)+' '+creneaux_annules_year['Date'].dt.day_name(locale='fr_FR.utf8')+' '+creneaux_annules_year['Date'].dt.time.map(str)
creneaux_annules_year['Service Semaine'] = creneaux_annules_year['Date'].dt.day_name(locale='fr_FR.utf8')+' '+creneaux_annules_year['Date'].dt.time.map(str)


creneaux_annules_semaine = creneaux_annules_year.groupby(['Service Semaine Order', 'Service Semaine'])[['Nb services annulés']].sum().rename({'role': 'Service annulé'})
creneaux_annules_semaine = creneaux_annules_semaine.reset_index().set_index('Service Semaine')
creneaux_annules_semaine
fig = px.bar(creneaux_annules_semaine['Nb services annulés'])
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_11_creneaux_annules_semaine', limit_year)


In [None]:
import hashlib

services_baristas = services_baristas_annule[services_baristas_annule['role'] != "CRENEAU ANNULE"]
#services_baristas = services_baristas_annule[services_baristas_annule['role'] != "RENFORT"]

premier_service = services_baristas.groupby('barista_hash')[['Date']].min()['Date'].to_dict()
services_baristas['Date premier service'] = services_baristas['barista_hash'].map(premier_service)
services_baristas['is premier service'] = services_baristas['Date'] == services_baristas['Date premier service']

dernier_service = services_baristas.groupby('barista_hash')[['Date']].max()['Date'].to_dict()
services_baristas['Date dernier service'] = services_baristas['barista_hash'].map(dernier_service)
services_baristas['is dernier service'] = services_baristas['Date'] == services_baristas['Date dernier service']

In [None]:
nb_baristas = services_baristas.groupby(['Mois','nom','telephone'])[['role']].count().rename(columns= {'role': 'nb'}).reset_index()
nb_baristas['Année'] = nb_baristas['Mois'].apply(lambda x: x.year)

nb_baristas_mois = nb_baristas.groupby('Mois').aggregate(func=['count', 'sum'])[['nb']]
nb_baristas_mois.columns = nb_baristas_mois.columns.droplevel(0)
nb_baristas_mois = nb_baristas_mois[['count', 'sum']]
nb_baristas_mois = nb_baristas_mois.rename(columns={'count': 'Nb de baristas', 'sum': 'Nb de services réalisés'})
nb_baristas_mois['Année'] = nb_baristas_mois.index.year

nb_baristas_ans = nb_baristas.groupby(['Année','nom','telephone'])[['nb']].sum().reset_index().groupby('Année')[['nb']].aggregate(func=['count', 'sum'])
nb_baristas_ans.columns = nb_baristas_ans.columns.droplevel(0)
nb_baristas_ans = nb_baristas_ans.rename(columns={'count': 'Nb de baristas', 'sum': 'Nb de services réalisés'})

In [None]:
fig = px.bar(nb_baristas_ans[['Nb de baristas']])
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_02_nb_baristas_par_an', limit_year)

In [None]:
fig = px.bar(nb_baristas_ans[['Nb de services réalisés']])
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
fig.update_layout(separators=', ')
fig.update_yaxes(tickformat=",d")
dt.fig_save_or_show(fig, 'baristas_03_nb_services_realises_par_an', limit_year)

In [None]:
fig = px.bar(nb_baristas_mois['Nb de baristas'])
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_04_nb_baristas_par_mois', limit_year)

In [None]:
fig = px.bar(nb_baristas_mois['Nb de services réalisés'])
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_05_nb_services_réalisés_par_mois', limit_year)

In [None]:
services = services_baristas[services_baristas['role'] == "REFERENT"].groupby(['Mois','nom','telephone'])[['role']].count().rename(columns= {'role': 'nb'})
services = services.groupby('Mois').aggregate(func=['count', 'sum'])
services.columns = services.columns.droplevel(0)
services['Année'] = services.index.year

services = services.reset_index().set_index(['Mois', 'Année']).unstack('Année').fillna(0)

In [None]:
fig = px.bar(services['count'])
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_06_nb_referents_par_mois', limit_year)

In [None]:
nb_services = services_baristas[['Mois', 'Date', 'role']].groupby(['Mois', 'Date']).count().rename(columns={'role': 'nb'}).reset_index()[['Mois', 'Date']].groupby('Mois').count()
nb_services['Année'] = nb_services.index.year
nb_services = nb_services.reset_index().set_index(['Mois', 'Année']).unstack('Année').fillna(0)
nb_services.columns = nb_services.columns.droplevel(0)

fig = px.bar(nb_services)
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
dt.fig_save_or_show(fig, 'baristas_07_nb_services_ouverts', limit_year)

In [None]:
limit_date_year = now + relativedelta(years=-1)
services_baristas_year = services_baristas[services_baristas['Mois'] > limit_date_year]
services_baristas_year['is_premier_service_12_mois'] = services_baristas_year['Date premier service'] >= (max(services_baristas_year['Date premier service']) + relativedelta(years=-1)).isoformat()
services_baristas_year['is_not_premier_service_12_mois'] = services_baristas_year['Date premier service'] < (max(services_baristas_year['Date premier service']) + relativedelta(years=-1)).isoformat()
baristas_year = services_baristas_year.groupby(['nom', 'barista_hash', 'is_not_premier_service_12_mois'])[['role']].count().rename(columns={'role': 'nb'})
baristas_year.sort_values(by=['nb', 'is_not_premier_service_12_mois', 'nom'], ascending=False, inplace=True)
baristas_year.rename(columns={'nb': 'Nb de services'}, inplace=True)
baristas_year = baristas_year.reset_index().set_index('nom')
baristas_year['Est là depuis plus d\'un an'] = baristas_year[baristas_year['is_not_premier_service_12_mois']]['Nb de services']
baristas_year['A fait son premier service dans les 12 mois'] = baristas_year[baristas_year['is_not_premier_service_12_mois'] != True]['Nb de services']
graph_baristas_year = baristas_year.copy()
del graph_baristas_year['is_not_premier_service_12_mois']
del graph_baristas_year['barista_hash']
del graph_baristas_year['Nb de services']

fig = px.bar(graph_baristas_year)
fig.update_layout(showlegend=False,xaxis_title=None,yaxis_title=None)
fig.update_xaxes(showticklabels=False)
dt.fig_save_or_show(fig, 'baristas_08_nb_services_par_baristas_12_mois', limit_year)

In [None]:
if dt.is_notebook():
    print(baristas_year.head(60))

In [None]:
res={
    'un_seul_service_premier_service': len(baristas_year[baristas_year['Nb de services'] < 2][baristas_year['is_not_premier_service_12_mois'] != True]),
    'un_seul_service': len(baristas_year[baristas_year['Nb de services'] < 2]),
    'deux_services': len(baristas_year[baristas_year['Nb de services'] < 3]),
    'plus_de_5_services': len(baristas_year[baristas_year['Nb de services'] > 5]),
    'plus_de_10_services': len(baristas_year[baristas_year['Nb de services'] > 10]),
    'plus_de_20_services': len(baristas_year[baristas_year['Nb de services'] > 20]),
    'plus_de_40_services': len(baristas_year[baristas_year['Nb de services'] > 40]),
    'plus_de_60_services': len(baristas_year[baristas_year['Nb de services'] > 60]),
    'nb_baristas': len(baristas_year),
    'moyenne_du_nb_de_services': baristas_year['Nb de services'].mean(),
    'nb_plus_de_12_mois': len(baristas_year['Est là depuis plus d\'un an'].dropna()),
    'nb_moins_de_12_mois': len(baristas_year['A fait son premier service dans les 12 mois'].dropna())
}
baristas_std = baristas_year.reset_index()
res['ecart_type_du_nb_de_services'] = baristas_std[baristas_std.index == int(len(baristas_std)/2)].reset_index()['Nb de services'][0]
if dt.is_notebook():
    print(res)

In [None]:
services_baristas_year['has_premier_service'] = services_baristas_year['Date'].isin(services_baristas_year[services_baristas_year['is premier service']]['Date'].to_list())
t2nb = baristas_year.set_index('barista_hash')['Nb de services'].to_dict()
services_baristas_year['nb services'] = services_baristas_year['barista_hash'].map(t2nb)
services_baristas_year['is_unique_service'] = services_baristas_year['nb services'] == 1
has_premier_service_year = services_baristas_year[services_baristas_year['has_premier_service']]
n2nb1 = has_premier_service_year.groupby('nom')['has_premier_service'].count().to_dict()
has_premier_service_year['nb 1er services'] = has_premier_service_year['nom'].map(n2nb1)
has_premier_service = has_premier_service_year[has_premier_service_year['Date'].isin(has_premier_service_year[has_premier_service_year['is_unique_service']]['Date'])]
has_premier_service = has_premier_service.groupby(['nom', 'role'])['nb 1er services'].aggregate(func=['first', 'count']).rename(columns={'first': 'nb avec 1er service', 'count': 'nb avec uniq service'})
has_premier_service['pc'] = has_premier_service['nb avec uniq service'] * 100 / has_premier_service['nb avec 1er service']
has_premier_service = has_premier_service[has_premier_service['nb avec uniq service'] != 1]
has_premier_service.sort_values(by='pc', ascending=False)