# Import du dataset

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import itertools

In [None]:
url = f"https://docs.google.com/spreadsheets/d/1PIMO76csrwWa7eRCZHQziJQvqGMy7M_EpZLblZjRFMU/gviz/tq?tqx=out:csv&sheet=dataset_clean"
df = pd.read_csv(url)
df

Unnamed: 0,age,attrition,deplacements_pro,service,trajet_quotidien_en_miles,num_niveau_detude,domaine_detude,identifiant_employe,num_satisfaction_environnement,genre,...,pourcentage_augmentation,niveau_carriere,temps_de_formation,anciennete,anciennete_poste,derniere_promotion,annees_avec_manager,prenom,nom,email
0,41,Oui,Rare,Ventes,1,2,Sciences,1,2,Femme,...,11 - 12,06 - 10,0,06 - 10,03 - 05,0 - 02,03 - 05,Rose,Atkins,r.atkins@ibm-data.com
1,49,Non,Fréquent,R&D,8,1,Sciences,2,3,Homme,...,21 +,06 - 10,01 - 03,06 - 10,06 - 10,0 - 02,06 - 10,Timothy,Davis,t.davis@ibm-data.com
2,37,Oui,Rare,R&D,2,2,Autre,4,4,Homme,...,15 - 20,06 - 10,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,Harold,Newman,h.newman@ibm-data.com
3,33,Non,Fréquent,R&D,3,4,Sciences,5,4,Femme,...,11 - 12,06 - 10,01 - 03,06 - 10,06 - 10,03 - 05,0 - 02,Michelle,Olson,m.olson@ibm-data.com
4,27,Non,Rare,R&D,2,1,Médecine,7,1,Homme,...,11 - 12,06 - 10,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,Richard,Riley,r.riley@ibm-data.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,Non,Fréquent,R&D,23,2,Médecine,2061,3,Homme,...,15 - 20,10 - 20,01 - 03,03 - 05,0 - 02,0 - 02,03 - 05,Ricky,Johnson,r.johnson1@ibm-data.com
1466,39,Non,Rare,R&D,6,1,Médecine,2062,4,Homme,...,15 - 20,06 - 10,04 +,06 - 10,06 - 10,0 - 02,06 - 10,Carlos,Holmes,c.holmes@ibm-data.com
1467,27,Non,Rare,R&D,4,3,Sciences,2064,2,Homme,...,15 - 20,06 - 10,0,06 - 10,0 - 02,0 - 02,03 - 05,Alejandro,Mclaughlin,a.mclaughlin@ibm-data.com
1468,49,Non,Fréquent,Ventes,2,3,Médecine,2065,4,Homme,...,13 - 14,10 - 20,01 - 03,06 - 10,06 - 10,0 - 02,06 - 10,Roger,Green,r.green@ibm-data.com


# Analyse attrition vs autres variables

In [None]:
# Agrégation
df_dp = (df.groupby(["categorie_age", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("categorie_age")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["categorie_age"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par catégorie d'âge", "Attrition (%) par catégorie d'âge"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["categorie_age"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["categorie_age"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["deplacements_pro", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("deplacements_pro")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["deplacements_pro"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par déplacements professionnels", "Attrition (%) par déplacements professionnels"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["deplacements_pro"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["deplacements_pro"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["service", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("service")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["service"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par services", "Attrition (%) par services"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["service"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["service"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["categorie_trajet", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("categorie_trajet")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["categorie_trajet"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par trajets quotidiens en miles", "Attrition (%) par trajets quotidiens"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["categorie_trajet"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["categorie_trajet"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["niveau_detude", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("niveau_detude")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["niveau_detude"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par niveau d'études", "Attrition (%) par niveau d'études"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["niveau_detude"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["niveau_detude"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["domaine_detude", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("domaine_detude")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["domaine_detude"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par domaine d'études", "Attrition (%) par domaine d'études"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["domaine_detude"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["domaine_detude"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["satisfaction_environnement", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("satisfaction_environnement")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["satisfaction_environnement"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par satisfaction de l'environnement", "Attrition (%) par satisfaction de l'environnement"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["satisfaction_environnement"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["satisfaction_environnement"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["genre", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("genre")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["genre"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par genre", "Attrition (%) par genre"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["genre"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["genre"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["implication", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("implication")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["implication"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par implication", "Attrition (%) par implication"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["implication"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["implication"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["niveau_hierarchique", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("niveau_hierarchique")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["niveau_hierarchique"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par niveau hiérarchique", "Attrition (%) par niveau hiérarchique"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["niveau_hierarchique"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["niveau_hierarchique"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["poste", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("poste")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["poste"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par postes", "Attrition (%) par postes"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["poste"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["poste"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["satisfaction_travail", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("satisfaction_travail")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["satisfaction_travail"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par satisfaction au travail", "Attrition (%) par satisfaction au travail"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["satisfaction_travail"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["satisfaction_travail"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["situation", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("situation")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["situation"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par situation personnelle", "Attrition (%) par situation personnelle"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["situation"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["situation"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["categorie_salaire", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("categorie_salaire")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["categorie_salaire"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par catégorie de salaire", "Attrition (%) par catégorie de salaire"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["categorie_salaire"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["categorie_salaire"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["nb_entreprises", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("nb_entreprises")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["nb_entreprises"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par nombre d'entreprises travaillées", "Attrition (%) par nombre d'entreprises travaillées"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["nb_entreprises"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["nb_entreprises"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["heures_sup", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("heures_sup")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["heures_sup"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par heures supplémentaires", "Attrition (%) par heures supplémentaires"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["heures_sup"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["heures_sup"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["pourcentage_augmentation", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("pourcentage_augmentation")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["pourcentage_augmentation"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par pourcentage d'augmentation", "Attrition (%) par pourcentage d'augmentation"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["pourcentage_augmentation"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["pourcentage_augmentation"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["performance", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("performance")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["performance"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par niveau de performance", "Attrition (%) par niveau de performance"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["performance"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["performance"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["satisfaction_relation", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("satisfaction_relation")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["satisfaction_relation"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par niveau de satisfaction des relations", "Attrition (%) par niveau de satisfaction des relations"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["satisfaction_relation"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["satisfaction_relation"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["taux_daction", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("taux_daction")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["taux_daction"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par taux d'actions", "Attrition (%) par taux d'actions"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["taux_daction"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["taux_daction"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["niveau_carriere", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("niveau_carriere")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["niveau_carriere"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par nombre d'années travaillées", "Attrition (%) par nombre d'années travaillées"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["niveau_carriere"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["niveau_carriere"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["temps_de_formation", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("temps_de_formation")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["temps_de_formation"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par temps de formation", "Attrition (%) par temps de formation"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["temps_de_formation"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["temps_de_formation"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["equilibre_pro_perso", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("equilibre_pro_perso")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["equilibre_pro_perso"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par équilibre vie pro/perso", "Attrition (%) par équilibre vie pro/perso"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["equilibre_pro_perso"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["equilibre_pro_perso"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["anciennete", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("anciennete")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["anciennete"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par ancienneté", "Attrition (%) par ancienneté"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["anciennete"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["anciennete"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["anciennete_poste", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("anciennete_poste")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["anciennete_poste"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par ancienneté au poste", "Attrition (%) par ancienneté au poste"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["anciennete_poste"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["anciennete_poste"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["derniere_promotion", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("derniere_promotion")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["derniere_promotion"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par dernière promotion", "Attrition (%) par dernière promotion"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["derniere_promotion"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["derniere_promotion"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

In [None]:
# Agrégation
df_dp = (df.groupby(["annees_avec_manager", "attrition"], as_index=False).agg(count=("attrition", "count")))

# Proportions
df_dp["proportion"] = (df_dp["count"] / df_dp.groupby("annees_avec_manager")["count"].transform("sum"))

# Colormap
colormap = {s: c for s, c in zip(df["annees_avec_manager"].unique(),["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"])}
colormap_attr = {"Oui": "#1d70c2", "Non": "#513b56"}

# Subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "bar"}, {"type": "bar"}]], subplot_titles=["Attrition par nombre d'années avec le manager actuel", "Attrition (%) par nombre d'années avec le manager actuel"])


# Graphique 1 : Bar chart normal
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["annees_avec_manager"], y=df_f["count"], name=att, marker_color=colormap_attr[att], text=df_f["count"], textposition="outside"), row=1, col=1)


# Graphique 2 : Bar chart empilé 100%
for att in df_dp["attrition"].unique():
    df_f = df_dp[df_dp["attrition"] == att]
    fig.add_trace(go.Bar(x=df_f["annees_avec_manager"], y=df_f["proportion"], name=att, marker_color=colormap_attr[att], text=(df_f["proportion"]*100).round(1).astype(str) + "%", textposition="inside"), row=1, col=2)

# Layout
fig.update_layout(height=500, width=1200, barmode="stack", showlegend=True)
fig.show()

# Conclusions analyses attrition vs autres variables

Mise en lumière des catégories qui semblement les plus impactantes :
* Salaire
* Âge
* Niveau hiérarchique

=> le genre de nous montre rien pour le moment mais nous souhaitons approfondir


Catégories impactantes dans une moindre mesure :
* voyage d’affaire
* Service
* Education
* Satisfaction d’environnement
* Satisfaction du travail
* Satisfaction des relations
* Implication au travail
* Job Roles, en fonction du secteur
* Statut Marital
* Nombre d’entreprise
* Overtime
* StockOptionLevel
* TotalWorkingYears
* Work Life Balance
* YearsAtCompany
* YearsInCurrentRole
* YearsSinceLastPromotion
* Distance From Home
   


Catégories peu impactantes
* Domaine d’études
* Percent Salary Hike
* Performance



=> Recherche d'un profil type de personne qui part et de personne qui reste

# Création des datasets des personnes restées et des personnes ayant quitté l'entreprise

In [None]:
df_non = df[df["attrition"]=="Non"]
df_oui = df[df["attrition"]=="Oui"]

In [None]:
df_non

Unnamed: 0,age,attrition,deplacements_pro,service,trajet_quotidien_en_miles,num_niveau_detude,domaine_detude,identifiant_employe,num_satisfaction_environnement,genre,...,pourcentage_augmentation,niveau_carriere,temps_de_formation,anciennete,anciennete_poste,derniere_promotion,annees_avec_manager,prenom,nom,email
1,49,Non,Fréquent,R&D,8,1,Sciences,2,3,Homme,...,21 +,06 - 10,01 - 03,06 - 10,06 - 10,0 - 02,06 - 10,Timothy,Davis,t.davis@ibm-data.com
3,33,Non,Fréquent,R&D,3,4,Sciences,5,4,Femme,...,11 - 12,06 - 10,01 - 03,06 - 10,06 - 10,03 - 05,0 - 02,Michelle,Olson,m.olson@ibm-data.com
4,27,Non,Rare,R&D,2,1,Médecine,7,1,Homme,...,11 - 12,06 - 10,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,Richard,Riley,r.riley@ibm-data.com
5,32,Non,Fréquent,R&D,2,2,Sciences,8,4,Homme,...,13 - 14,06 - 10,01 - 03,06 - 10,06 - 10,03 - 05,06 - 10,Richard,Brown,r.brown@ibm-data.com
6,59,Non,Rare,R&D,3,3,Médecine,10,3,Femme,...,15 - 20,10 - 20,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,Kristy,Wilson,k.wilson@ibm-data.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,Non,Fréquent,R&D,23,2,Médecine,2061,3,Homme,...,15 - 20,10 - 20,01 - 03,03 - 05,0 - 02,0 - 02,03 - 05,Ricky,Johnson,r.johnson1@ibm-data.com
1466,39,Non,Rare,R&D,6,1,Médecine,2062,4,Homme,...,15 - 20,06 - 10,04 +,06 - 10,06 - 10,0 - 02,06 - 10,Carlos,Holmes,c.holmes@ibm-data.com
1467,27,Non,Rare,R&D,4,3,Sciences,2064,2,Homme,...,15 - 20,06 - 10,0,06 - 10,0 - 02,0 - 02,03 - 05,Alejandro,Mclaughlin,a.mclaughlin@ibm-data.com
1468,49,Non,Fréquent,Ventes,2,3,Médecine,2065,4,Homme,...,13 - 14,10 - 20,01 - 03,06 - 10,06 - 10,0 - 02,06 - 10,Roger,Green,r.green@ibm-data.com


In [None]:
df_oui

Unnamed: 0,age,attrition,deplacements_pro,service,trajet_quotidien_en_miles,num_niveau_detude,domaine_detude,identifiant_employe,num_satisfaction_environnement,genre,...,pourcentage_augmentation,niveau_carriere,temps_de_formation,anciennete,anciennete_poste,derniere_promotion,annees_avec_manager,prenom,nom,email
0,41,Oui,Rare,Ventes,1,2,Sciences,1,2,Femme,...,11 - 12,06 - 10,0,06 - 10,03 - 05,0 - 02,03 - 05,Rose,Atkins,r.atkins@ibm-data.com
2,37,Oui,Rare,R&D,2,2,Autre,4,4,Homme,...,15 - 20,06 - 10,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,Harold,Newman,h.newman@ibm-data.com
14,28,Oui,Rare,R&D,24,3,Sciences,19,3,Homme,...,13 - 14,06 - 10,04 +,03 - 05,0 - 02,0 - 02,03 - 05,Robert,Scott,r.scott@ibm-data.com
21,36,Oui,Rare,Ventes,9,4,Sciences,27,3,Homme,...,21 +,06 - 10,04 +,03 - 05,03 - 05,0 - 02,03 - 05,Tracy,Fisher,t.fisher@ibm-data.com
24,34,Oui,Rare,R&D,6,1,Médecine,31,2,Homme,...,11 - 12,06 - 10,01 - 03,03 - 05,0 - 02,0 - 02,03 - 05,Casey,Schwartz,c.schwartz@ibm-data.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1438,23,Oui,Fréquent,Ventes,9,3,Marketing,2023,4,Homme,...,15 - 20,0 - 02,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,William,Graham,w.graham@ibm-data.com
1442,29,Oui,Rare,R&D,1,4,Médecine,2027,1,Homme,...,13 - 14,03 - 05,01 - 03,0 - 02,0 - 02,0 - 02,0 - 02,Henry,Sanchez,h.sanchez@ibm-data.com
1444,56,Oui,Rare,R&D,7,2,Techonologies,2032,4,Homme,...,11 - 12,10 - 20,04 +,06 - 10,06 - 10,06 - 10,06 - 10,Grant,Martinez,g.martinez@ibm-data.com
1452,50,Oui,Fréquent,Ventes,1,4,Sciences,2044,2,Homme,...,11 - 12,10 - 20,01 - 03,06 - 10,03 - 05,0 - 02,0 - 02,Corey,Martin,c.martin@ibm-data.com


In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]
attrition_counts = df["attrition"].value_counts().reset_index()
attrition_counts.columns = ["attrition", "Count"]
pie_attrition = px.pie(attrition_counts, names="attrition", values="Count", color="attrition", color_discrete_sequence=colormap)
pie_attrition.update_traces(textinfo="value+label+percent")
pie_attrition.update_layout(title="Pour rappel : distribution de l'attrition en nombre d'employés", showlegend=False, width=1000, height=500)
pie_attrition.show()

# Comparaison des variables

In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

# Comptage et tri des catégories
count_oui = (df_oui["deplacements_pro"].value_counts().sort_values(ascending=False).index.tolist())

count_non = (df_non["deplacements_pro"].value_counts().sort_values(ascending=False).index.tolist())

# Création subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Chart Attrition=Oui
pie1 = px.pie(df_oui, names="deplacements_pro", category_orders={"deplacements_pro": count_oui}, color="deplacements_pro", color_discrete_sequence=colormap, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
  fig.add_trace(trace, row=1, col=1)

# Pie chart Attrition=non
pie2 = px.pie(df_non, names="deplacements_pro", category_orders={"deplacements_pro": count_non}, color="deplacements_pro", color_discrete_sequence=colormap, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

# --- Mise en forme ---
fig.update_layout(height=500, showlegend=False, title="Déplacements professionnels")

fig.show()

In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

# Comptage et tri des catégories
count_oui = (df_oui["service"].value_counts().sort_values(ascending=False).index.tolist())

count_non = (df_non["service"].value_counts().sort_values(ascending=False).index.tolist())

# Création subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Chart Attrition=Oui
pie1 = px.pie(df_oui, names="service", category_orders={"service": count_oui}, color="service", color_discrete_sequence=colormap, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
  fig.add_trace(trace, row=1, col=1)

# Pie chart Attrition=non
pie2 = px.pie(df_non, names="service", category_orders={"service": count_non}, color="service", color_discrete_sequence=colormap, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

# --- Mise en forme ---
fig.update_layout(height=500, showlegend=False, title="Services")

fig.show()

In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

# Comptage et tri des catégories
count_oui = (df_oui["categorie_trajet"].value_counts().sort_values(ascending=False).index.tolist())

count_non = (df_non["categorie_trajet"].value_counts().sort_values(ascending=False).index.tolist())

# Création subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Chart Attrition=Oui
pie1 = px.pie(df_oui, names="categorie_trajet", category_orders={"categorie_trajet": count_oui}, color="categorie_trajet", color_discrete_sequence=colormap, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
  fig.add_trace(trace, row=1, col=1)

# Pie chart Attrition=non
pie2 = px.pie(df_non, names="categorie_trajet", category_orders={"categorie_trajet": count_non}, color="categorie_trajet", color_discrete_sequence=colormap, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

# --- Mise en forme ---
fig.update_layout(height=500, showlegend=False, title="Services")

fig.show()

In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

# Comptage et tri des catégories
count_oui = (df_oui["niveau_detude"].value_counts().sort_values(ascending=False).index.tolist())
count_non = (df_non["niveau_detude"].value_counts().sort_values(ascending=False).index.tolist())

# Création subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Chart Attrition=Oui
pie1 = px.pie(df_oui, names="niveau_detude", category_orders={"niveau_detude": count_oui}, color="niveau_detude", color_discrete_sequence=colormap, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
  fig.add_trace(trace, row=1, col=1)

# Pie chart Attrition=non
pie2 = px.pie(df_non, names="niveau_detude", category_orders={"niveau_detude": count_non}, color="niveau_detude", color_discrete_sequence=colormap, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

# --- Mise en forme ---
fig.update_layout(height=500, showlegend=False, title="Niveau d'études")

fig.show()

In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

# Comptage et tri des catégories
count_oui = (df_oui["domaine_detude"].value_counts().sort_values(ascending=False).index.tolist())
count_non = (df_non["domaine_detude"].value_counts().sort_values(ascending=False).index.tolist())

# Création subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Chart Attrition=Oui
pie1 = px.pie(df_oui, names="domaine_detude", category_orders={"domaine_detude": count_oui}, color="domaine_detude", color_discrete_sequence=colormap, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
  fig.add_trace(trace, row=1, col=1)

# Pie chart Attrition=non
pie2 = px.pie(df_non, names="domaine_detude", category_orders={"domaine_detude": count_non}, color="domaine_detude", color_discrete_sequence=colormap, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

# --- Mise en forme ---
fig.update_layout(height=500, showlegend=False, title="Domaine d'études")

fig.show()

In [None]:
categories = sorted(set(df_oui["satisfaction_environnement"]).union(df_non["satisfaction_environnement"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["satisfaction_environnement"].map(color_map)
df_non["color"] = df_non["satisfaction_environnement"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="satisfaction_environnement", category_orders={"satisfaction_environnement": count_oui}, color="satisfaction_environnement", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="satisfaction_environnement", category_orders={"satisfaction_environnement": count_non}, color="satisfaction_environnement", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Niveau de satisfaction de l'environnement")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
colormap = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

# Comptage et tri des catégories
count_oui = (df_oui["genre"].value_counts().sort_values(ascending=False).index.tolist())
count_non = (df_non["genre"].value_counts().sort_values(ascending=False).index.tolist())

# Création subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Chart Attrition=Oui
pie1 = px.pie(df_oui, names="genre", category_orders={"genre": count_oui}, color="genre", color_discrete_sequence=colormap, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
  fig.add_trace(trace, row=1, col=1)

# Pie chart Attrition=non
pie2 = px.pie(df_non, names="genre", category_orders={"genre": count_non}, color="genre", color_discrete_sequence=colormap, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

# --- Mise en forme ---
fig.update_layout(height=500, showlegend=False, title="Genre")

fig.show()

In [None]:
categories = sorted(set(df_oui["implication"]).union(df_non["implication"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["implication"].map(color_map)
df_non["color"] = df_non["implication"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="implication", category_orders={"implication": count_oui}, color="implication", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="implication", category_orders={"implication": count_non}, color="implication", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Implication")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["niveau_hierarchique"]).union(df_non["niveau_hierarchique"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["niveau_hierarchique"].map(color_map)
df_non["color"] = df_non["niveau_hierarchique"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="niveau_hierarchique", category_orders={"niveau_hierarchique": count_oui}, color="niveau_hierarchique", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="niveau_hierarchique", category_orders={"niveau_hierarchique": count_non}, color="niveau_hierarchique", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Niveau hiérarchique")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["poste"]).union(df_non["poste"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["poste"].map(color_map)
df_non["color"] = df_non["poste"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="poste", category_orders={"poste": count_oui}, color="poste", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="poste", category_orders={"poste": count_non}, color="poste", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Poste")
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["satisfaction_travail"]).union(df_non["satisfaction_travail"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["satisfaction_travail"].map(color_map)
df_non["color"] = df_non["satisfaction_travail"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="satisfaction_travail", category_orders={"satisfaction_travail": count_oui}, color="satisfaction_travail", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="satisfaction_travail", category_orders={"satisfaction_travail": count_non}, color="satisfaction_travail", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Niveau de satisfaction au travail")
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["situation"]).union(df_non["situation"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["situation"].map(color_map)
df_non["color"] = df_non["situation"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="situation", category_orders={"situation": count_oui}, color="situation", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="situation", category_orders={"situation": count_non}, color="situation", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Situation personnelle")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["categorie_salaire"]).union(df_non["categorie_salaire"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["categorie_salaire"].map(color_map)
df_non["color"] = df_non["categorie_salaire"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="categorie_salaire", category_orders={"categorie_salaire": count_oui}, color="categorie_salaire", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="categorie_salaire", category_orders={"categorie_salaire": count_non}, color="categorie_salaire", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Catégories de salaire")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["nb_entreprises"]).union(df_non["nb_entreprises"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["nb_entreprises"].map(color_map)
df_non["color"] = df_non["nb_entreprises"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="nb_entreprises", category_orders={"nb_entreprises": count_oui}, color="nb_entreprises", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="nb_entreprises", category_orders={"nb_entreprises": count_non}, color="nb_entreprises", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Nombre d'entreprises travaillées")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["heures_sup"]).union(df_non["heures_sup"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["heures_sup"].map(color_map)
df_non["color"] = df_non["heures_sup"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="heures_sup", category_orders={"heures_sup": count_oui}, color="heures_sup", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="heures_sup", category_orders={"heures_sup": count_non}, color="heures_sup", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Heures supplémentaires")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["pourcentage_augmentation"]).union(df_non["pourcentage_augmentation"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["pourcentage_augmentation"].map(color_map)
df_non["color"] = df_non["pourcentage_augmentation"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="pourcentage_augmentation", category_orders={"pourcentage_augmentation": count_oui}, color="pourcentage_augmentation", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="pourcentage_augmentation", category_orders={"pourcentage_augmentation": count_non}, color="pourcentage_augmentation", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Pourcentages d'augmentation")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["performance"]).union(df_non["performance"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["performance"].map(color_map)
df_non["color"] = df_non["performance"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="performance", category_orders={"performance": count_oui}, color="performance", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="performance", category_orders={"performance": count_non}, color="performance", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Performance")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["satisfaction_relation"]).union(df_non["satisfaction_relation"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["satisfaction_relation"].map(color_map)
df_non["color"] = df_non["satisfaction_relation"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="satisfaction_relation", category_orders={"satisfaction_relation": count_oui}, color="satisfaction_relation", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="satisfaction_relation", category_orders={"satisfaction_relation": count_non}, color="satisfaction_relation", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Satisfaction des relations au travail")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["taux_daction"]).union(df_non["taux_daction"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["taux_daction"].map(color_map)
df_non["color"] = df_non["taux_daction"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="taux_daction", category_orders={"taux_daction": count_oui}, color="taux_daction", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="taux_daction", category_orders={"taux_daction": count_non}, color="taux_daction", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Taux d'actions")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["niveau_carriere"]).union(df_non["niveau_carriere"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["niveau_carriere"].map(color_map)
df_non["color"] = df_non["niveau_carriere"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="niveau_carriere", category_orders={"niveau_carriere": count_oui}, color="niveau_carriere", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="niveau_carriere", category_orders={"niveau_carriere": count_non}, color="niveau_carriere", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Nombre d'années travaillées")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["temps_de_formation"]).union(df_non["temps_de_formation"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["temps_de_formation"].map(color_map)
df_non["color"] = df_non["temps_de_formation"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="temps_de_formation", category_orders={"temps_de_formation": count_oui}, color="temps_de_formation", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="temps_de_formation", category_orders={"temps_de_formation": count_non}, color="temps_de_formation", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Temps de formation")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["equilibre_pro_perso"]).union(df_non["equilibre_pro_perso"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["equilibre_pro_perso"].map(color_map)
df_non["color"] = df_non["equilibre_pro_perso"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="equilibre_pro_perso", category_orders={"equilibre_pro_perso": count_oui}, color="equilibre_pro_perso", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="equilibre_pro_perso", category_orders={"equilibre_pro_perso": count_non}, color="equilibre_pro_perso", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Equilibre vie pro/perso")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["anciennete"]).union(df_non["anciennete"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["anciennete"].map(color_map)
df_non["color"] = df_non["anciennete"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="anciennete", category_orders={"anciennete": count_oui}, color="anciennete", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="anciennete", category_orders={"anciennete": count_non}, color="anciennete", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Ancienneté")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["anciennete_poste"]).union(df_non["anciennete_poste"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["anciennete_poste"].map(color_map)
df_non["color"] = df_non["anciennete_poste"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="anciennete_poste", category_orders={"anciennete_poste": count_oui}, color="anciennete_poste", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="anciennete_poste", category_orders={"anciennete_poste": count_non}, color="anciennete_poste", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Ancienneté au poste actuel")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["derniere_promotion"]).union(df_non["derniere_promotion"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["derniere_promotion"].map(color_map)
df_non["color"] = df_non["derniere_promotion"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="derniere_promotion", category_orders={"derniere_promotion": count_oui}, color="derniere_promotion", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="derniere_promotion", category_orders={"derniere_promotion": count_non}, color="derniere_promotion", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Dernière promotion")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
categories = sorted(set(df_oui["annees_avec_manager"]).union(df_non["annees_avec_manager"]))
colors = ["#1d70c2", "#f87575", "#513b56", "#6a0136", "#f0ebd8", "#3D315B", "#BADEFC", "#C49BBB", "#FFC6D9"]

color_map = {cat: colors[i % len(colors)] for i, cat in enumerate(categories)}

df_oui["color"] = df_oui["annees_avec_manager"].map(color_map)
df_non["color"] = df_non["annees_avec_manager"].map(color_map)

# Subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Attrition=oui", "Attrition=non"), specs=[[{"type": "domain"}, {"type": "domain"}]])

# Pie Attrition = oui
pie1 = px.pie(df_oui, names="annees_avec_manager", category_orders={"annees_avec_manager": count_oui}, color="annees_avec_manager", color_discrete_map=color_map, hole=0.4)
pie1.update_traces(textinfo="percent+label")
for trace in pie1.data:
    fig.add_trace(trace, row=1, col=1)

# Pie Attrition = non
pie2 = px.pie(df_non, names="annees_avec_manager", category_orders={"annees_avec_manager": count_non}, color="annees_avec_manager", color_discrete_map=color_map, hole=0.4)
pie2.update_traces(textinfo="percent+label")
for trace in pie2.data:
    fig.add_trace(trace, row=1, col=2)

fig.update_layout(height=500, showlegend=True, title="Nombre d'années avec le manager")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Conslusions analyse profils types

Certaines catégories se démarquent encore :
* salaire
* heures supp
* situation
* âge
* niveau hiérarchique
* années avec manager
* type de postes
* taux d'actions
* carrière
* anciennetés

=> besoin d'une catégorisation par types de postes peut etre pour creuser les tendances

D'autres catégories à garder en tête :
* genre
* satisfactions
* implication
* trajets quotidiens

Nous passons à l'analyse bivariée