In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import load_data as ld
import dataReferences as dr
import seaborn as sns

dir_data = ".\\data\\data_gouv_fr\\"

# Usagers
dic_usagers = ld.load_usagers(folder_path=dir_data, start_year=2020, end_year=2021)

# Caractéristiques
dic_caract = ld.load_caract(folder_path=dir_data, start_year=2020, end_year=2021)

df_usagers = dic_usagers[2020]
df_caract = dic_caract[2020]

## Merge data et suppression des colonnes inutiles

In [None]:
#  merge usagers and caracteristiques
df = df_usagers.merge(right=df_caract, on='Num_Acc', how='left')

# print(df_usagers.columns)
# print("\n")
# print(df_caract.columns)
# print("\n")
# print(df_u_c.columns)

# inutiles : ['id_vehicule', 'num_veh', 'adr'] 
df = df.drop(columns=['id_vehicule', 'num_veh', 'adr', 'lat', 'long'], axis=1)

## Encodages

In [None]:
# création de 4 classes basée sur les quartiles
# df['age'].describe()
def get_cl_age(age):
    if age <= 25:
        return '0-25'
    if 25 < age <= 37:
        return '26-37'
    if 37 < age <= 53:
        return '38-53'
    if 53 < age:
        return '>53'

df['luminosité'] = df['lum'].apply(lambda i: dr.get_labels('lum', i))
df['gravité'] = df['grav'].apply(lambda i: dr.get_labels('grav', i))
df['age'] = 2022 - df['an_nais']
df["classe d'age"] = [get_cl_age(age) for age in df['age']]

df = df.drop(columns=['an_nais', 'age'], axis=1)

## Relation Gravité - Age

In [None]:
df_grav_age = pd.crosstab(df["classe d'age"], df['gravité'], normalize='index')
columns_titles = ['Tué', 'Blessé hospitalisé', 'Blessé léger', 'Indemne']
df_grav_age = df_grav_age.reindex(columns=columns_titles)

sns.set_theme(style="ticks")
f, ax = plt.subplots(figsize=(7, 5))
chart = df_grav_age.plot(kind="bar", stacked=True, rot=0, ax=ax, title="Distribution de la gravité en fonction de l'âge")
chart.set_xticklabels(chart.get_xticklabels(), rotation=80)
sns.move_legend(ax, "upper right")


## Relation Gravité - Luminosité

In [None]:
df_grav_lum = pd.crosstab(df['luminosité'], df['gravité'], normalize='index')
df_grav_lum=df_grav_lum.reindex(columns=columns_titles)

sns.set_theme(style="ticks")
f, ax = plt.subplots(figsize=(7, 5))
chart = df_grav_lum.plot(kind="bar", stacked=True, rot=0, ax=ax, 
                         title='Distribution de la gravité en fonction de la luminosité')
chart.set_xticklabels(chart.get_xticklabels(), rotation=80)
sns.move_legend(ax, "upper right")


