# Importation des Librairies 

In [1]:
import pandas as pd
import os
import pyAgrum as gum
from pyagrum_extra import gum
import dash
from dash import dcc
from dash import html, dash_table
from dash.dependencies import Input, Output
import plotly.express as px

## Import Data

In [2]:
ot_odr_filename = os.path.join("./data", "OT_ODR.csv.bz2")
df = pd.read_csv(ot_odr_filename,
                        compression="bz2",
                        sep=";")

In [3]:
df

Unnamed: 0,OT_ID,ODR_ID,ODR_LIBELLE,TYPE_TRAVAIL,DUREE_TRAVAIL,SYSTEM_N1,SYSTEM_N2,SYSTEM_N3,EQU_ID,DATE_OT,KILOMETRAGE,SIG_ORGANE,SIG_CONTEXTE,SIG_OBS,LIGNE
0,OT000000000,OM000000000,REMPLACEMENT D'UNE GLACE LAT VOYAGEUR,CARROSSERIE,4.00,EQUIPEMENT DE CARROSSERIE,VITRAGE,VITRAGE LAT,E00005934,2011-03-29 19:26:06,149698.557783,GLACE/BAIE,INTERIEUR/GAUCHE/ARRIERE,DEBOITE,L0482
1,OT000000001,OM000000001,REMPLACEMENT D'UN COMMODO DE SIGNALISATION,ELECTRICITE,0.50,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00004713,2011-05-03 20:01:31,225035.016000,KLAXON/GONG,AVANT,ABSENT,L0147
2,OT000000002,OM000000002,REMPLACEMENT D'UN CARDAN DE LIAISON SUR CREMAI...,MECANIQUE,1.50,EQUIPEMENT CHASSIS,EQUIPEMENT DE DIRECTION,COMMANDE DE DIRECTION,E00006037,2011-05-05 14:40:22,71148.834963,VOITURE,A L'ACCELERATION,VIBRE,L0368
3,OT000000003,OM000000003,REMPLACEMENT D'UN PARE-CHOCS AVG,CARROSSERIE,0.50,EQUIPEMENT DE CARROSSERIE,ELEMENT CARROSSERIE EXT,PROTECTION AV,E00005670,2011-05-07 07:43:27,116441.657358,PARE-CHOCS,AVANT/GAUCHE,CASSE,L0066
4,OT000000004,OM000000004,REMPLACEMENT D'UN POTENTIOMETRE DE PORTE NUMERO 1,ELECTRICITE,0.50,EQUIPEMENT DE CARROSSERIE,PORTE,COMMANDE PORTE,E00004009,2011-05-18 10:56:50,0.000000,SECURITE PORTE,ARRIERE,BLOQUE,L0247
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
506553,OT000405952,OM000506539,REMPLACEMENT D'UN FEU DE RECUL,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00040793,2019-09-30 20:38:09,296005.373397,ECLAIRAGE FEUX EXTERIEURS,ARRIERE/EXTERIEUR/DROIT,CASSE,L0283
506554,OT000405953,OM000506546,REMPLACEMENT D'UN ECLAIRAGE EXT,MECANIQUE,0.75,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00274690,2019-09-30 21:21:20,153050.080050,ECLAIRAGE FEUX EXTERIEURS,ARRIERE/LATERAL,CASSE,L0116
506555,OT000405954,OM000506536,REMPLACEMENT D'UN PASSE SANS CONTACT,EQUIPEMENT EMBARQUE,0.03,EQUIPEMENT EMBARQUE,TELEBILLETIQUE,PASSE SANS CONTACT,E00256452,2019-09-30 21:39:29,175063.182439,AVTT,AVANT/PORTE,INTERMITTENT,L0134
506556,OT000405955,OM000506545,REMPLACEMENT D'UNE LAMPE DE FEU DE GABARIT,ELECTRICITE,0.04,EQUIPEMENT ELECTRIQUE,ECLAIRAGE-SIGNALISATION,ECLAIRAGE-SIGNALISATION EXT,E00006122,2019-09-30 21:55:28,437053.614263,ECLAIRAGE FEUX EXTERIEURS,LATERAL/HAUT/GAUCHE,NE FONCTIONNE PAS,L0270


# MODELE 1 : SIG_OBS, N1

In [4]:
var_cat = ['ODR_LIBELLE', 'TYPE_TRAVAIL', 'SYSTEM_N1', 'SYSTEM_N2', 'SYSTEM_N3', 'SIG_ORGANE', 'SIG_CONTEXTE', 'SIG_OBS', 'LIGNE']

for var in var_cat:
    df[var] = df[var].astype('category')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506558 entries, 0 to 506557
Data columns (total 15 columns):
 #   Column         Non-Null Count   Dtype   
---  ------         --------------   -----   
 0   OT_ID          506558 non-null  object  
 1   ODR_ID         506558 non-null  object  
 2   ODR_LIBELLE    506558 non-null  category
 3   TYPE_TRAVAIL   506558 non-null  category
 4   DUREE_TRAVAIL  506558 non-null  float64 
 5   SYSTEM_N1      506558 non-null  category
 6   SYSTEM_N2      506558 non-null  category
 7   SYSTEM_N3      506558 non-null  category
 8   EQU_ID         506558 non-null  object  
 9   DATE_OT        506558 non-null  object  
 10  KILOMETRAGE    506557 non-null  float64 
 11  SIG_ORGANE     506558 non-null  category
 12  SIG_CONTEXTE   506558 non-null  category
 13  SIG_OBS        506558 non-null  category
 14  LIGNE          506558 non-null  category
dtypes: category(9), float64(2), object(4)
memory usage: 29.7+ MB


In [5]:
var_to_model = ["SYSTEM_N1", "SIG_OBS"]

var_bn = {}
for var in var_to_model:
    nb_values = len(df[var].cat.categories)
    var_bn[var] = gum.LabelizedVariable(var, var, nb_values)

### Ajout les labels

In [6]:
for var in var_bn:
    for i, modalite in enumerate(df[var].cat.categories):
        var_bn[var].changeLabel(i, modalite)

### Création du RB

In [20]:
bn = gum.BayesNet("Model 1")

for var in var_bn.values():
    bn.add(var)

bn.addArc("SIG_OBS", "SYSTEM_N1")
bn

(pyAgrum.BayesNet<double>@0x149734000) BN{nodes: 2, arcs: 1, domainSize: 590, dim: 589, mem: 5Ko 72o}

### Fit le RB pour calculer les probabilités

In [8]:
bn.fit_bis(df, verbose_mode=True)

- Learn CPT SYSTEM_N1
- Learn CPT SIG_OBS


In [9]:
bn.cpt("SYSTEM_N1")

(pyAgrum.Potential<double>@0x119a70b00) 
      ||  SYSTEM_N1                                                                                        |
SIG_OB||DIVERS   |EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|EQUIPEMEN|
------||---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|
A-COUP|| 0.0006  | 0.3728  | 0.0075  | 0.0752  | 0.0827  | 0.1548  | 0.1529  | 0.1222  | 0.0013  | 0.0301  |
ABSENT|| 0.0000  | 0.0009  | 0.0362  | 0.0704  | 0.0012  | 0.0070  | 0.0004  | 0.8435  | 0.0399  | 0.0006  |
ALLUME|| 0.0012  | 0.1110  | 0.0012  | 0.4402  | 0.0012  | 0.0012  | 0.0012  | 0.4402  | 0.0012  | 0.0012  |
ARRACH|| 0.0002  | 0.0253  | 0.0005  | 0.7252  | 0.0002  | 0.0103  | 0.0002  | 0.2348  | 0.0007  | 0.0026  |
ASPIRE|| 0.0000  | 0.0000  | 0.0000  | 0.9744  | 0.0000  | 0.0000  | 0.0000  | 0.0256  | 0.0000  | 0.0000  |
BALANC|| 0.0000  | 0.6828  | 0.0050  | 0.2020  | 0.0211  | 0.0136  | 0.0012  | 0.0310  

# Performance Modèle

In [10]:
def evaluation(list_var_obsr: list,var_target: str):
    # Pour 30% des valeurs mettres : 151966 
    ot_odr_df_train = df.iloc[:-10000]
    ot_odr_df_test = df.iloc[-10000:]
    bn.fit_bis(ot_odr_df_train, verbose_mode=True)
    pred = bn.predict(ot_odr_df_test[list_var_obsr], var_target=var_target, show_progress=True)
    evaluation = (ot_odr_df_test[var_target] == pred).mean()
    print(f"Pour la target '{var_target}' avec les variables observés : {list_var_obsr} \nCe modèle on obtiens une performance de {evaluation*100} %")
    return evaluation

In [11]:
ot_odr_df_train = df.iloc[:-10000]
ot_odr_df_test = df.iloc[-10000:]

In [12]:
bn.fit_bis(ot_odr_df_train, verbose_mode=True)

- Learn CPT SYSTEM_N1
- Learn CPT SIG_OBS


In [13]:
pred = bn.predict(ot_odr_df_test[["SIG_OBS"]], var_target="SYSTEM_N1", show_progress=True)

predict progress: 100%

In [14]:
evaluation = (ot_odr_df_test["SYSTEM_N1"] == pred).mean()
print(f"Avec ce modèle on obtiens une performance de {evaluation*100} %")

Avec ce modèle on obtiens une performance de 63.24999999999999 %


## Application Dash

In [15]:
var_features = ["SIG_OBS"] # Variables explicatives
var_targets = ["SYSTEM_N1"] # Variables à expliquer

performance_data = {
    'Target': var_targets,
    'Pourcentage de performance': [evaluation * 100]
}

performance_df = pd.DataFrame(performance_data)

In [16]:
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Modèle 1 : Prédiction de la variable 'SYSTEM_N1' avec la variable 'SIG_OBS'"),
    html.Div([
        html.Div([
            html.Label(var),
            dcc.Dropdown(
                id=f'{var}-dropdown',
                options=[{'label': i, 'value': i} for i in df[var].cat.categories],
                value=df[var].cat.categories[0]
            )
        ]) for var in var_features
    ], style={'width': '30%', 'display': 'inline-block'}),
    html.Div([
        dash_table.DataTable(
            id='performance-table',
            columns=[{"name": i, "id": i} for i in performance_df.columns],
            data=performance_df.to_dict('records'),
        ),
    ], style={'margin': '20px'}),
    html.Div([
        dcc.Graph(id=f'{var}-graph', config={'displayModeBar': False})
        for var in var_targets
    ], style={'width': '65%', 'float': 'right', 'display': 'inline-block'})
])

### Update graph with selected options

In [18]:
@app.callback(
    [Output(f'{var}-graph', 'figure') for var in var_targets],
    [Input(f'{var}-dropdown', 'value') for var in var_features]
)
def update_graph(*var_features_values):
    bn_ie = gum.LazyPropagation(bn)

    ev = {var: value for var, value in zip(var_features, var_features_values)}
    bn_ie.setEvidence(ev)
    bn_ie.makeInference()

    prob_target = []
    for i, var in enumerate(var_targets):
        prob_target_var = bn_ie.posterior(var).topandas().droplevel(0)
        prob_target_var_sorted = prob_target_var.sort_values(ascending=False)
        top_5_probs = prob_target_var_sorted.head(5)
        prob_fig = px.bar(top_5_probs)
        prob_fig.update_layout(title=f"Top 5 Probabilités pour {var}")
        prob_fig.update_xaxes(title="Catégorie")
        prob_fig.update_yaxes(title="Probabilités")
        prob_target.append(prob_fig)

    return tuple(prob_target)

In [19]:
app.run_server(debug=True, port=8049, use_reloader=False)

Dash is running on http://127.0.0.1:8049/

 * Serving Flask app '__main__'
 * Debug mode: on
