In [2]:
import pandas as pd
from pathlib import Path
import numpy as np
from utils import get_tx_from_nb, plot_horizontal_barchart, highlight_corr, format_pct, format_round
import plotly.express as px
import plotly.graph_objects as go

import joblib 

In [3]:
path_data = Path('../data/')
path_outputs = Path('../outputs/')
path_df_stagiaire = path_data / 'df_stagiaire.pkl'
path_df_intervenant = path_data / "df_intervenant.pkl"
path_labels = path_data / "labels.pkl"
df = pd.read_pickle(path_df_stagiaire)
df_int = pd.read_pickle(path_df_intervenant)
with open(path_labels, "rb") as f: labels = joblib.load(f)

## contexte

In [4]:
nb_fi = len(df)
print(f"nombre d'étudiants en formation initiale: {nb_fi}")

nombre d'étudiants en formation initiale: 86


## analyses

### satisfaction globale

In [5]:
df_sat = pd.DataFrame({"metric": ["Satisfaction globale", "Pédagogie", "Organisation", "motivation"]})
for i in range(1, 5):
    lab = labels["likert"][i-1]
    df_sat[lab] = [sum([(df[f"{m}.EV{i_mod}"] == i).sum() for i_mod in range(1, 6)]) for m in df_sat["metric"]]
for l in labels["likert"]:
    df_sat[f"PCT_{l}"] = df_sat[l] / df_sat[labels["likert"]].sum(axis=1)
    df_sat[f"PCT_{l}"] = df_sat[f"PCT_{l}"].apply(format_pct)
df_sat["note_moyenne"] = sum([df_sat[labels["likert"][i-1]]*i for i in range(1, 5)]) / df_sat[labels["likert"]].sum(axis=1)
df_sat["note_moyenne"] = df_sat["note_moyenne"].apply(format_round)

In [6]:
def plot_hor_bc(d, y, traces, colors, pct=False):
    fig = go.Figure()
    for t in traces:
        cl = colors[t]
        cl_str = f"rgba({cl[0]}, {cl[1]}, {cl[2]}, 1)"
        cl_str_a = f"rgba({cl[0]}, {cl[1]}, {cl[2]}, 0.7)"
        fig.add_trace(go.Bar(
            y = d[y],
            x = d[t],
            name = t,
            orientation = 'h',
            marker = dict(
                color=cl_str_a,
                line=dict(color="rgba(237, 231, 225, 1)", width=2)
            )
        ))
    fig.update_layout(
        barmode="stack",
        paper_bgcolor='rgb(255, 255, 255)',
        plot_bgcolor='rgb(255, 255, 255)',
        xaxis=dict(
            showgrid=False,
            showline=False,
            showticklabels=False,
            zeroline=False
        )
    )
    annot = list()
    for m in d[y]:
        annotations = list()
        space = 0
        for t in traces:
            v = d.loc[d[y] == m, t].values[0]
            x = space + v/2
            space += v
            a = dict(
                xref="x", yref="y",
                x=x, y=m,
                text=f"{v}{pct*' %'}",
                showarrow=False,
                font=dict(family='Arial', size=14,
                color='black')
            )
            annot.append(a)

    fig.update_layout(
        annotations=annot,
        showlegend=False,
        font=dict(
            size=18
        )
    )
    return fig  


In [7]:
df_sat

Unnamed: 0,metric,Très satisfait,Satisfait,Déçu,Très déçu,PCT_Très satisfait,PCT_Satisfait,PCT_Déçu,PCT_Très déçu,note_moyenne
0,Satisfaction globale,13,125,146,26,4,40,47,8,2.6
1,Pédagogie,84,71,82,73,27,23,26,24,2.46
2,Organisation,65,77,111,57,21,25,36,18,2.52
3,motivation,39,79,101,91,13,25,33,29,2.79


In [8]:
colors = {"PCT_Très déçu": (228, 5, 19), "PCT_Déçu": (255, 148, 3), "PCT_Satisfait": (187, 235, 0), "PCT_Très satisfait": (37, 167, 56)}
fig = plot_hor_bc(df_sat, y="metric", traces=[f"PCT_{c}" for c in labels["likert"]], colors=colors, pct=True)
fig.show()

In [9]:
path_out = path_outputs / "sondage_stagiaire.png"
fig.write_image(path_out)

### par module

In [10]:
# Satisfaction globale, Motivation, Pédagogie, Organisation
modules = [f"module {i}" for i in range(1, 6)] + ["évaluation finale"]
mesures = ["Satisfaction globale", "motivation", "Pédagogie", "Organisation"]
mod_likert_nb = {mes:dict() for mes in mesures}

for mes in mesures:
    mod_likert_nb[mes] = dict()
    for i in range(1,6):
        mod = modules[i-1]
        col = f"{mes}.EV{i}"
        mod_likert_nb[mes][mod] = [(df[col] == n).sum() for n in range(1,5)]

    if mes in ["motivation", "Pédagogie"]:
        mod = modules[-1]
        col = f"{mes}.EV6"
        mod_likert_nb[mes][mod] = [(df[col] == n).sum() for n in range(1,5)]

In [11]:
cats = [f"module {i}" for i in range(1, 6)] + ["évaluation finale"]


include_EF = False
i = 2
ic = True
iv = False
il = False
pct = True

if not include_EF: cats = cats[:-1]

for mes in mesures:
    #mes = mesures[i]
    top_labels = labels["likert"]
    mod_likert_tx = get_tx_from_nb(mod_likert_nb)
    print(mes)
    print(mod_likert_tx[mes])
    title = f"{mes} par module"
    plot_horizontal_barchart(cats, mod_likert_tx[mes], top_labels, inv_cats=ic, inv_values=iv, inv_labels=il, pct=pct, title=title)


Satisfaction globale
{'module 1': [0, 35, 53, 11], 'module 2': [3, 40, 48, 8], 'module 3': [3, 39, 50, 8], 'module 4': [5, 37, 52, 6], 'module 5': [10, 50, 32, 8]}


motivation
{'module 1': [0, 34, 37, 29], 'module 2': [15, 23, 35, 27], 'module 3': [16, 19, 32, 32], 'module 4': [15, 23, 31, 32], 'module 5': [18, 29, 27, 26], 'évaluation finale': [18, 37, 39, 6]}


Pédagogie
{'module 1': [27, 18, 23, 32], 'module 2': [27, 23, 23, 27], 'module 3': [23, 21, 40, 16], 'module 4': [29, 27, 21, 23], 'module 5': [29, 26, 26, 19], 'évaluation finale': [24, 24, 31, 21]}


Organisation
{'module 1': [13, 39, 19, 29], 'module 2': [24, 18, 42, 16], 'module 3': [23, 16, 42, 19], 'module 4': [21, 24, 39, 16], 'module 5': [24, 27, 37, 11]}


In [12]:
## tableau de performance par module
# taux de présence moyen
# nb stagiaires ayant assisté à l'ensemble des sessions présentielles du module
# nb d'abandons suite au module
# note moyenne (étudiants ayant participé au moins à 1 journée présentielle)
# (int) engagement des étudiants
# (int) satisfaction globale
# (int) satisfaction sur l'organisation
data_module = dict()
data_module["module"] = range(1, 6)

In [13]:
# taux de présence par module
data_module["tx_presence_module"] = [df[[f"PJ{i}" for i in range(1 + (imod-1)*3, 4 + (imod-1)*3)]].sum(axis=1).sum()/(3*len(df)) for imod in range(1, 6)]
data_module["tx_presence_module"] = [round(x*100)/100 for x in data_module["tx_presence_module"]]

In [14]:
# nombre de présents sur tout le module
#data_module["nb_present_tout_module"] = [df[f"present_tout_module_{i}"].sum() for i in range(1,6)]

In [15]:
## nombre d'abandons
data_module[f"nb_abandons_apres_module"] = [df[f"abandon_apres_module_{i}"].sum() for i in range(1, 6)]
print(f"% de la promotion abandonnant après la fin du premier module: {data_module['nb_abandons_apres_module'][0]/len(df):.0%}")

% de la promotion abandonnant après la fin du premier module: 13%


In [16]:
## note moyenne (étudiants ayant participé au moins à 1 journée présentielle)
note_moy = list()
for i in range(1, 6):
    b = ~df[f"absent_module_{i}"]
    moy = df[b][f"REV{i}"].mean()
    moy = round(moy*100)/100
    note_moy.append(moy)
data_module["note_moyenne"] = note_moy

In [17]:
## satisfaction des intervenants:
# satisfaction globale
# engagement des étudiants
# organisation du module
interv_engag = list()
interv_orga = list()
interv_satis = list()
for i in range(1, 6):
    interv_engag.append(df_int[f"Implication étudiants.EV{i}"].mean())
    interv_orga.append(df_int[f"Organisation.EV{i}"].mean())
    interv_satis.append(df_int[f"Satisfaction globale.EV{i}"].mean())
data_module["interv_engag"] = interv_engag
data_module["interv_orga"] = interv_orga
data_module["interv_satis"] = interv_satis

In [18]:
## ajout de la satisfaction des apprenants en moyenne sur chaque module
data_module["sta_motivation"] = [round(df[f"motivation.EV{i}"].mean()*100)/100 for i in range(1, 6)]
data_module["sta_peda"] = [round(df[f"Pédagogie.EV{i}"].mean()*100)/100 for i in range(1, 6)]
data_module["sta_orga"] = [round(df[f"Organisation.EV{i}"].mean()*100)/100 for i in range(1, 6)]
data_module["sta_global"] = [round(df[f"Satisfaction globale.EV{i}"].mean()*100)/100 for i in range(1, 6)]

In [19]:
df_data_module = pd.DataFrame(data_module)
df_data_module.to_excel(path_outputs / "df_module.xlsx")
df_data_module

Unnamed: 0,module,tx_presence_module,nb_abandons_apres_module,note_moyenne,interv_engag,interv_orga,interv_satis,sta_motivation,sta_peda,sta_orga,sta_global
0,1,0.82,11,6.29,4.5,5.0,5.5,2.95,2.6,2.65,2.76
1,2,0.69,0,6.6,3.5,4.5,4.5,2.76,2.5,2.5,2.61
2,3,0.61,1,5.97,3.0,3.5,4.0,2.81,2.5,2.58,2.63
3,4,0.76,2,6.43,5.5,5.5,5.5,2.81,2.37,2.5,2.6
4,5,0.53,0,6.34,2.0,2.0,3.0,2.61,2.35,2.35,2.39


In [20]:
df_data_module = pd.DataFrame(data_module)
df_data_module["tx_presence_module"] = df_data_module["tx_presence_module"] * 100
fig = px.line(data_frame=df_data_module, x="module", y="tx_presence_module", labels = {"tx_presence_module": "Taux de présence", "module": "Module"})
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 1,
        dtick = 1
    ),
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 50,
        dtick = 10 
    ), 
    yaxis_ticksuffix = "%"
)
fig.update_xaxes(tickfont_size=20, title_font_size=30)
fig.update_yaxes(tickfont_size=20, title_font_size=30)
fig.show()

In [21]:
tx_pres_pef = df["PEF"].mean()
print(f"taux de présence à la journée d'évaluation finale: {tx_pres_pef:.0%}")

taux de présence à la journée d'évaluation finale: 84%


In [22]:
df_data_module.corr("spearman").style.applymap(lambda x: "color: red;")

Unnamed: 0,module,tx_presence_module,nb_abandons_apres_module,note_moyenne,interv_engag,interv_orga,interv_satis,sta_motivation,sta_peda,sta_orga,sta_global
module,1.0,-0.7,-0.46169,0.1,-0.4,-0.4,-0.564288,-0.666886,-0.974679,-0.820783,-0.9
tx_presence_module,-0.7,1.0,0.820783,0.1,0.9,0.9,0.974679,0.820783,0.666886,0.666886,0.6
nb_abandons_apres_module,-0.46169,0.820783,1.0,-0.410391,0.718185,0.718185,0.789474,0.947368,0.552632,0.763158,0.615587
note_moyenne,0.1,0.1,-0.410391,1.0,0.3,0.3,0.205196,-0.46169,-0.307794,-0.564288,-0.5
interv_engag,-0.4,0.9,0.718185,0.3,1.0,1.0,0.974679,0.666886,0.359092,0.410391,0.3
interv_orga,-0.4,0.9,0.718185,0.3,1.0,1.0,0.974679,0.666886,0.359092,0.410391,0.3
interv_satis,-0.564288,0.974679,0.789474,0.205196,0.974679,0.974679,1.0,0.763158,0.526316,0.552632,0.46169
sta_motivation,-0.666886,0.820783,0.947368,-0.46169,0.666886,0.666886,0.763158,1.0,0.763158,0.921053,0.820783
sta_peda,-0.974679,0.666886,0.552632,-0.307794,0.359092,0.359092,0.526316,0.763158,1.0,0.921053,0.974679
sta_orga,-0.820783,0.666886,0.763158,-0.564288,0.410391,0.410391,0.552632,0.921053,0.921053,1.0,0.974679


### performance académique

In [23]:
df_perf = pd.DataFrame({"métrique": ["note moyenne"]})
for i in range(1, 6):
    df_perf[f"module {i}"] = df[f"REV{i}"].mean()
df_perf["examen final"] = df["REV6"].mean()
col_modules = [f"module {i}" for i in range(1, 6)]
df_perf["moyenne sur les 5 modules"] = df_perf[col_modules].mean(axis = 1)
df_perf["moyenne globale"] = df_perf[col_modules + ["examen final"]].mean(axis = 1)
for c in df_perf.columns:
    if c != "métrique":
        df_perf[c] = df_perf[c].apply(format_round)
df_perf.to_csv(path_outputs / "perf_academique.csv")
df_perf

Unnamed: 0,métrique,module 1,module 2,module 3,module 4,module 5,examen final,moyenne sur les 5 modules,moyenne globale
0,note moyenne,5.87,5.94,5.06,5.45,5.31,5.89,5.53,5.59


In [24]:
pct_reussite = format_pct((df["MOYENNE_EV"] >= 5).mean())
pct_echec = format_pct((df["MOYENNE_EV"] < 5).mean())
print(f"Taux de réussite: {pct_reussite}%")
print(f"Taux d'échec: {pct_echec}%")

Taux de réussite: 74%
Taux d'échec: 26%


In [25]:
vc = df["academic_lab"].value_counts()
tmp = vc / vc.sum()
df_profil_aca = pd.DataFrame({"profil": tmp.index, "pct": tmp.values})
df_profil_aca["pct"] = df_profil_aca["pct"].apply(format_pct)
df_profil_aca

Unnamed: 0,profil,pct
0,bon,29
1,excellent,28
2,échec,26
3,moyen,17


### persona

In [26]:
## personas
# résultats académiques:
# profil de présence: 
# motivé: 
# promoteur: 
personas = {
    "tête de classe": {"th_aca_1": 8, "th_aca_2": 11, "th_pre_1": 1, "th_pre_2": 2},
    "sérieux": {"th_aca_1": 6, "th_aca_2": 8, "th_pre_1": 0.8, "th_pre_2": 2},
    "intermittent": {"th_aca_1": 5, "th_aca_2": 6, "th_pre_1": 0.4, "th_pre_2": 0.6},
    "démissionnaire": {"th_aca_1": 0, "th_aca_2": 5, "th_pre_1": 0, "th_pre_2": 0.4}
}

for p_lab in personas:
    p = personas[p_lab]
    b_aca = ((df["MOYENNE_EV"] >= p["th_aca_1"]) & (df["MOYENNE_EV"] < p["th_aca_2"]))
    b_pre = ((df["pre_tx"] >= p["th_pre_1"]) & (df["pre_tx"] < p["th_pre_2"]))
    b = (b_aca & b_pre)
    df[f"persona_{p_lab}"] = b
    print(f"{p_lab}: {b.sum()} ({b.mean():.0%})")

tête de classe: 21 (24%)
sérieux: 23 (27%)
intermittent: 9 (10%)
démissionnaire: 15 (17%)


In [27]:
for p_lab in personas:
    tot = df.loc[df[f"persona_{p_lab}"]]["repondu_sondage"].sum()
    tx = df.loc[df[f"persona_{p_lab}"]]["repondu_sondage"].mean()
    print(f"taux de réponse pour '{p_lab}': {tx:.0%} (nb personnes: {tot})")

taux de réponse pour 'tête de classe': 76% (nb personnes: 16)
taux de réponse pour 'sérieux': 83% (nb personnes: 19)
taux de réponse pour 'intermittent': 67% (nb personnes: 6)
taux de réponse pour 'démissionnaire': 73% (nb personnes: 11)


In [28]:
df_persona_sat = pd.DataFrame({"persona": personas.keys()})
metric = "Satisfaction globale"
df_plot = df_persona_sat

for i in range(1, 5):
    lab = labels["likert"][i-1]
    df_plot[lab] = [sum([(df[f"{metric}.EV{i_mod}"].loc[df[f"persona_{pl}"]] == i).sum() for i_mod in range(1, 6)]) for pl in df_plot["persona"]]
for l in labels["likert"]:
    df_plot[f"PCT_{l}"] = df_plot[l] / df_plot[labels["likert"]].sum(axis=1)
    df_plot[f"PCT_{l}"] = df_plot[f"PCT_{l}"].apply(format_pct)
df_plot["note_moyenne"] = sum([df_plot[labels["likert"][i-1]]*i for i in range(1, 5)]) / df_plot[labels["likert"]].sum(axis=1)
df_plot["note_moyenne"] = df_plot["note_moyenne"].apply(format_round)
df_persona_sat

Unnamed: 0,persona,Très satisfait,Satisfait,Déçu,Très déçu,PCT_Très satisfait,PCT_Satisfait,PCT_Déçu,PCT_Très déçu,note_moyenne
0,tête de classe,1,39,32,8,1,49,40,10,2.59
1,sérieux,1,44,43,7,1,46,45,7,2.59
2,intermittent,5,15,10,0,17,50,33,0,2.17
3,démissionnaire,0,13,36,6,0,24,65,11,2.87


In [29]:
colors = {"PCT_Très déçu": (228, 5, 19), "PCT_Déçu": (255, 148, 3), "PCT_Satisfait": (187, 235, 0), "PCT_Très satisfait": (37, 167, 56)}
fig = plot_hor_bc(df_persona_sat, y="persona", traces=[f"PCT_{c}" for c in labels["likert"]], colors=colors, pct=True)
fig.show()

In [30]:
df_persona_motiv = pd.DataFrame({"persona": personas.keys()})
metric = "motivation"
df_plot = df_persona_motiv
labels["likert_motiv"] = ["Très motivé", "Motivé", "Démotivé", "Très démotivé"]

for i in range(1, 5):
    lab = labels["likert_motiv"][i-1]
    df_plot[lab] = [sum([(df[f"{metric}.EV{i_mod}"].loc[df[f"persona_{pl}"]] == i).sum() for i_mod in range(1, 6)]) for pl in df_plot["persona"]]
for l in labels["likert_motiv"]:
    df_plot[f"PCT_{l}"] = df_plot[l] / df_plot[labels["likert_motiv"]].sum(axis=1)
    df_plot[f"PCT_{l}"] = df_plot[f"PCT_{l}"].apply(format_pct)
df_plot["note_moyenne"] = sum([df_plot[labels["likert_motiv"][i-1]]*i for i in range(1, 5)]) / df_plot[labels["likert_motiv"]].sum(axis=1)
df_plot["note_moyenne"] = df_plot["note_moyenne"].apply(format_round)
df_persona_motiv

Unnamed: 0,persona,Très motivé,Motivé,Démotivé,Très démotivé,PCT_Très motivé,PCT_Motivé,PCT_Démotivé,PCT_Très démotivé,note_moyenne
0,tête de classe,10,21,24,25,12,26,30,31,2.8
1,sérieux,10,16,29,40,11,17,31,42,3.04
2,intermittent,8,17,5,0,27,57,17,0,1.9
3,démissionnaire,1,9,27,18,2,16,49,33,3.13


In [31]:
colors = {"PCT_Très démotivé": (228, 5, 19), "PCT_Démotivé": (255, 148, 3), "PCT_Motivé": (187, 235, 0), "PCT_Très motivé": (37, 167, 56)}
fig = plot_hor_bc(df_persona_motiv, y="persona", traces=[f"PCT_{c}" for c in labels["likert_motiv"]], colors=colors, pct=True)
fig.show()

### score NPS

[Net promoter Score](https://fr.wikipedia.org/wiki/Net_Promoter_Score): estime la probabilité que les apprenants recommendent le cours à un ami ou collègue
- Les détracteurs (Score de 0 à 6)
- les passifs (NPS de 7 à 8)
- les promoteurs (NPS de 9 à 10)


In [32]:
print(f"Score NPS moyen: {df['NPS.EV6'].mean():.1f}")

Score NPS moyen: 6.4


In [33]:
profils_nps = dict()
nb_scores = df["NPS_LABEL"].isin(labels["NPS"]).sum()
for pr in labels["NPS"]:
    profils_nps[pr] = (df["NPS_LABEL"] == pr).sum()
    moy = profils_nps[pr]/nb_scores
    print(f"nombre de {pr}: {profils_nps[pr]} ({moy:.0%})")


nombre de detracteurs: 30 (48%)
nombre de passifs: 19 (31%)
nombre de promoteurs: 13 (21%)


In [34]:
df_persona_nps = pd.DataFrame({"persona": personas.keys()})
metric = "score NPS"
df_plot = df_persona_nps
labels["NPS"] = ['promoteurs', 'passifs', 'detracteurs']
labs = labels["NPS"]

for i in range(1, 4):
    lab = labs[i-1]
    df_plot[lab] = [((df["NPS_LABEL"] == lab) & (df[f"persona_{pl}"])).sum() for pl in df_plot["persona"]]
for l in labs:
    df_plot[f"PCT_{l}"] = df_plot[l] / df_plot[labs].sum(axis=1)
    df_plot[f"PCT_{l}"] = df_plot[f"PCT_{l}"].apply(format_pct)
df_persona_nps

Unnamed: 0,persona,promoteurs,passifs,detracteurs,PCT_promoteurs,PCT_passifs,PCT_detracteurs
0,tête de classe,1,9,6,6,56,38
1,sérieux,5,4,10,26,21,53
2,intermittent,1,3,2,17,50,33
3,démissionnaire,1,2,8,9,18,73


In [35]:
colors = {"PCT_detracteurs": (228, 5, 19), "PCT_passifs": (255, 255, 0), "PCT_promoteurs": (37, 167, 56)}
fig = plot_hor_bc(df_persona_nps, y="persona", traces=[f"PCT_{c}" for c in labels["NPS"]], colors=colors, pct=True)
fig.show()

In [36]:
## score NPS moyen par persona
for pl in personas:
    sc = df["NPS.EV6"].loc[df[f"persona_{pl}"]].mean()
    print(f"Score NPS moyen pour '{pl}': {sc:.2f}") 

Score NPS moyen pour 'tête de classe': 6.31
Score NPS moyen pour 'sérieux': 6.74
Score NPS moyen pour 'intermittent': 6.83
Score NPS moyen pour 'démissionnaire': 5.09


### engagement sur la plateforme en ligne

In [59]:
# engagement moyen
# fréquence d'utilisation moyenne
# pas possible d'investiguer quels cours en ligne + engageants car moyenne globale
# taux d'engagement sur plateforme par persona
# fréquence d'utilisation par persona
# hypothèses sur explication des chiffres

In [62]:
moy_engag = df['OC_engag_tx'].mean()
moy_jr_connexion = df['OC_JC'].mean()
print(f"Taux d'engagement moyen sur la plateforme: {moy_engag:.0%} ({moy_jr_connexion:.1f} jours / 15)")

Taux d'engagement moyen sur la plateforme: 56% (8.4 jours / 15)


In [63]:
moy_freq = df["OC_F"].mean()
print(f"fréquence moyenne de connection à la plateforme: {moy_freq:.2f} / 3")


fréquence moyenne de connection à la plateforme: 1.65 / 3


In [74]:
pd.DataFrame({"engagement": df["OC_engag_tx"], "succès académique": df["MOYENNE_EV"]}).corr(method="spearman")

Unnamed: 0,engagement,succès académique
engagement,1.0,0.890724
succès académique,0.890724,1.0


In [75]:
pd.DataFrame({"fréquence de connection": df["OC_F"], "succès académique": df["MOYENNE_EV"]}).corr(method="spearman")

Unnamed: 0,fréquence de connection,succès académique
fréquence de connection,1.0,0.441762
succès académique,0.441762,1.0


In [65]:
df_persona_engag = pd.DataFrame({"persona": personas.keys()})

In [69]:
df_persona_engag["taux_engagement"] = [df["OC_engag_tx"].loc[df[f"persona_{pl}"]].mean() for pl in df_persona_engag["persona"]]
df_persona_engag["freq_connection"] = [df["OC_F"].loc[df[f"persona_{pl}"]].mean() for pl in df_persona_engag["persona"]]

In [70]:
df_persona_engag

Unnamed: 0,persona,taux_engagement,freq_connection
0,tête de classe,0.828571,2.095238
1,sérieux,0.713043,1.304348
2,intermittent,0.437037,2.666667
3,démissionnaire,0.12,1.0


In [73]:
metric = "Fréquence utilisation plateforme"
df_plot = df_persona_engag
labels["fréquence plateforme"] = ['faible', 'moyenne', 'haute']
labs = labels["fréquence plateforme"]

for i in range(1, 4):
    lab = labs[i-1]
    df_plot[lab] = [((df["OC_F"] == i) & (df[f"persona_{pl}"])).sum() for pl in df_plot["persona"]]
for l in labs:
    df_plot[f"PCT_{l}"] = df_plot[l] / df_plot[labs].sum(axis=1)
    df_plot[f"PCT_{l}"] = df_plot[f"PCT_{l}"].apply(format_pct)
df_plot

Unnamed: 0,persona,taux_engagement,freq_connection,faible,moyenne,haute,PCT_faible,PCT_moyenne,PCT_haute
0,tête de classe,0.828571,2.095238,5,9,7,24,43,33
1,sérieux,0.713043,1.304348,16,7,0,70,30,0
2,intermittent,0.437037,2.666667,0,3,6,0,33,67
3,démissionnaire,0.12,1.0,15,0,0,100,0,0


In [79]:
colors = {"PCT_haute": (12, 19, 92), "PCT_moyenne": (0, 184, 199), "PCT_faible": (189, 238, 176)}  
fig = plot_hor_bc(df_plot, y="persona", traces=[f"PCT_{c}" for c in labs], colors=colors, pct=True)
fig.show()

### correlation des indicateurs

In [43]:
col_corr = [c for c in df.columns if c.startswith("MOYENNE_")]
col_corr += ["motivation.EV6", "Pédagogie.EV6","Insertionpro.EV6","NPS.EV6", "PEF"]
col_corr += ["pre_tx", "SUCCES", "OC_engag_tx"]
df_corr = df[col_corr].corr(method="spearman")
s = df_corr.style.applymap(highlight_corr)
s
df_corr.to_excel(path_outputs / "correlation.xlsx")