In [1]:
# Analyse des données de saturation

In [2]:
import os
import sys
new_path = os.getcwd()[:-34] + 'qualicharge-rtet'
sys.path.append(new_path)

In [3]:
import geopandas as gpd
import pandas as pd

In [4]:
data_quali = '../data/'
data_rtet = '../../rtet/'
origine = 'V1_0_0_'
stations_parcs_file = 'stations_parcs.geojson'
pdc_activite_file = ['pdc_en_activite_paca_avril_prod.json', 'pdc_en_activite_paca_avril_staging.json']
sessions_file = ['session_bouches_du_rhone_25_avril_staging.json', 'session_bouches_du_rhone_25_avril_prod.json']
status_file = ['status_bouches_du_rhone_25_avril_AM_staging.json', 'status_bouches_du_rhone_25_avril_AM_prod.json',
               'status_bouches_du_rhone_25_avril_PM18_staging.json', 'status_bouches_du_rhone_25_avril_PM24_staging.json',
               'status_bouches_du_rhone_25_avril_PM24_prod.json',]
refnat = {'tiles': 'cartodbpositron', 'location': [46.3, 2.3], 'zoom_start': 7}

## lecture des fichiers de la journée

In [5]:
liste_pdc = [pd.read_json(data_quali + file, orient='records') for file in pdc_activite_file]
pdc_en_activite = pd.concat(liste_pdc , ignore_index=True)[['id_pdc_itinerance', 'id_station_itinerance']].groupby('id_pdc_itinerance').first().reset_index()
len(pdc_en_activite)
pdc_en_activite

Unnamed: 0,id_pdc_itinerance,id_station_itinerance
0,FRALLEGO0000041,FRALLPGO000004
1,FRALLEGO0001411,FRALLP6IG0OECYY7T3
2,FRALLEGO0001431,FRALLPGO000143
3,FRALLEGO0001591,FRALLP6IG0OECYY7T3
4,FRALLEGO0001592,FRALLP6IG0OECYY7T3
...,...,...
1420,FRVIAE20142106011,FRVIAP142106
1421,FRVIAE20142106012,FRVIAP142106
1422,FRVIAE20142106013,FRVIAP142106
1423,FRVIAE20142106021,FRVIAP142106


In [6]:
stations_parcs = gpd.read_file(data_rtet + origine + stations_parcs_file)
#stations_parcs

In [7]:
pdc_stations = pdc_en_activite[pdc_en_activite['id_station_itinerance'].isin(stations_parcs['id_station'])]
#pdc_stations

In [8]:
liste_statuses = [pd.read_json(data_quali + file, orient='records') for file in status_file]
all_statuses = pd.concat(liste_statuses , ignore_index=True)
print(len(all_statuses))
status = all_statuses[all_statuses['id_pdc_itinerance'].isin(pdc_stations['id_pdc_itinerance'])].copy()
print(len(status))
status['horodatage'] = pd.to_datetime(status['horodatage'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
#status

6924
3573


In [9]:
liste_sessions = [pd.read_json(data_quali + file, orient='records') for file in sessions_file]
all_sessions = pd.concat(liste_sessions , ignore_index=True)
print(len(all_sessions))
sessions = all_sessions[all_sessions['id_pdc_itinerance'].isin(pdc_stations['id_pdc_itinerance'])].copy()
print(len(sessions))
sessions['start'] = pd.to_datetime(sessions['start'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
sessions['end'] = pd.to_datetime(sessions['end'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
#sessions

2033
1241


## Génération des états échantillonnés des points de recharge

In [10]:
def to_sampled_statuses(data: pd.DataFrame, init_data: pd.DataFrame, timestamp: pd.Timestamp, echantillons: int):
    samples = pd.date_range(start=timestamp, end=timestamp+pd.Timedelta(days=1), periods=echantillons+1)
    periode = pd.DataFrame( {'periode': samples[0:echantillons]})
    state = pd.concat([data, init_data]).sort_values(by=['id_pdc_itinerance', 'horodatage'])
    state = state[(state['etat_pdc'] != 'inconnu')]
    state['f_horodatage'] = list(state['horodatage'])[1:len(state)] + [samples[echantillons]]
    state['f_id_pdc_itinerance'] = list(state['id_pdc_itinerance'])[1:len(state)] + ['aucun']
    crossed = pd.merge(state, periode, how='cross')
    sampled = crossed[((crossed['id_pdc_itinerance'].eq(crossed['f_id_pdc_itinerance'])) &
                 (crossed['periode'] >= crossed['horodatage']) &
                 (crossed['periode'] < crossed['f_horodatage'])) |
                (~(crossed['id_pdc_itinerance'].eq(crossed['f_id_pdc_itinerance'])) &
                 (crossed['periode'] >= crossed['horodatage']))]
    columns = ['periode', 'etat_pdc', 'id_pdc_itinerance']
    return sampled[columns].sort_values(by=['id_pdc_itinerance', 'periode']).reset_index(drop=True)

In [11]:
def to_sampled_sessions(data: pd.DataFrame, init_data: pd.DataFrame, timestamp: pd.Timestamp, echantillons: int):
    null_date = pd.Timestamp('2000-01-01T00:00:00+02:00')
    samples = pd.date_range(start=timestamp, end=timestamp+pd.Timedelta(days=1), periods=echantillons+1)
    periode = pd.DataFrame( {'periode': samples[0:echantillons]})
    sessions = pd.concat([data, init_data]).sort_values(by=['id_pdc_itinerance', 'start'])
    pdc = sessions['id_pdc_itinerance'].unique()
    sessions['occupation_pdc'] = 'occupe'
    
    crossed = pd.merge(sessions, periode, how='cross')
    sampled = crossed[((crossed['periode'] >= crossed['start']) &
                       (crossed['periode'] < crossed['end']))][['periode', 'occupation_pdc', 'id_pdc_itinerance']]
    
    non_occupe = pd.merge(periode, pd.DataFrame({'id_pdc_itinerance': sessions['id_pdc_itinerance'].unique()}), how='cross')
    sampled = pd.merge(non_occupe, sampled, how='left', on=['id_pdc_itinerance', 'periode']).fillna("f_libre")
    return sampled.sort_values(by=['id_pdc_itinerance', 'periode']).reset_index(drop=True)

In [56]:
echantillons = 288 #24
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')

In [13]:
pdc = status['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'etat_pdc': ['en_service'] * len(pdc), 
                      'id_pdc_itinerance': pdc}) 
sampled_statuses = to_sampled_statuses(status, init, timestamp, echantillons)
#sampled_statuses

In [14]:
pd.pivot_table(sampled_statuses, columns='etat_pdc', aggfunc="count")

etat_pdc,en_service,hors_service
id_pdc_itinerance,79473,303
periode,79473,303


In [15]:
pdc = sessions['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'start': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'end': [timestamp + pd.Timedelta(hours=-1)] * len(pdc),
                      'id_pdc_itinerance': pdc}) 
sampled_sessions = to_sampled_sessions(sessions, init, timestamp, echantillons)
#sampled_sessions

In [16]:
pd.pivot_table(sampled_sessions, index = 'id_pdc_itinerance', columns='occupation_pdc', aggfunc="count")
pd.pivot_table(sampled_sessions, columns='occupation_pdc', aggfunc="count")

occupation_pdc,f_libre,occupe
id_pdc_itinerance,58125,7360
periode,58125,7360


## regroupement des fichiers pdc

In [17]:
def to_sampled_state_pdc(sessions, statuses):
    merged = pd.merge(sessions, statuses, how='outer', on=['id_pdc_itinerance', 'periode']).fillna('aaa')
    merged['state'] = merged[['etat_pdc', 'occupation_pdc']].agg('max', axis=1).replace('en_service', 'libre')
    merged = merged[['id_pdc_itinerance', 'periode', 'state']].replace('f_libre', 'libre')
    return merged.sort_values(by=['id_pdc_itinerance', 'periode']).reset_index(drop=True)

In [18]:
sampled_state_pdc = to_sampled_state_pdc(sampled_sessions, sampled_statuses)
sampled_state_pdc

Unnamed: 0,id_pdc_itinerance,periode,state
0,FRALLEGO0001411,2025-04-25 00:00:00+02:00,libre
1,FRALLEGO0001411,2025-04-25 00:05:00+02:00,libre
2,FRALLEGO0001411,2025-04-25 00:10:00+02:00,libre
3,FRALLEGO0001411,2025-04-25 00:15:00+02:00,libre
4,FRALLEGO0001411,2025-04-25 00:20:00+02:00,libre
...,...,...,...
80744,FRVIAE20142101011,2025-04-25 23:35:00+02:00,libre
80745,FRVIAE20142101011,2025-04-25 23:40:00+02:00,libre
80746,FRVIAE20142101011,2025-04-25 23:45:00+02:00,libre
80747,FRVIAE20142101011,2025-04-25 23:50:00+02:00,libre


In [19]:
pd.pivot_table(sampled_state_pdc, columns='state', aggfunc="count")

state,hors_service,libre,occupe
id_pdc_itinerance,297,73092,7360
periode,297,73092,7360


## fichier d'état des stations ou parcs

In [20]:
def to_sampled_state_grp(state_pdc, pdc_group, group_name: str):
    merged =  pd.merge(state_pdc, pdc_group, how='left', on='id_pdc_itinerance')
    print(len(state_pdc), len(merged))
    merged['occupe'] = merged['state'] == 'occupe'
    merged['hors_service'] = merged['state'] == 'hors_service'
    merged['libre'] = merged['state'] == 'libre'
    
    grouped = merged[[group_name, 'periode', 'occupe', 'hors_service', 'libre']].groupby([group_name, 'periode']).sum()
    grouped['nb_pdc'] = grouped['occupe'] + grouped['hors_service'] + grouped['libre']

    grouped['hs'] = (grouped['libre'] + grouped['occupe'] == 0) & (grouped['hors_service'] > 0)
    grouped['inactif'] = ~grouped['hs'] & (grouped['occupe'] == 0)
    grouped['sature'] = ~grouped['hs'] & ~grouped['inactif'] & (grouped['libre']/grouped['nb_pdc'] < 0.1)
    grouped['surcharge'] = ~grouped['hs'] & ~grouped['inactif'] & ~grouped['sature'] & (grouped['libre']/grouped['nb_pdc'] < 0.2)
    grouped['actif'] = ~grouped['hs'] & ~grouped['inactif'] & ~grouped['sature'] & ~grouped['surcharge']
    grouped['state'] = grouped['hs'] + grouped['inactif'] * 2 + grouped['actif'] * 3 + grouped['surcharge'] * 4 + grouped['sature'] * 5
    
    return grouped

In [41]:
sample_state_station = to_sampled_state_grp(sampled_state_pdc, pdc_stations, 'id_station_itinerance')
sample_state_station

80749 80749


Unnamed: 0_level_0,Unnamed: 1_level_0,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
id_station_itinerance,periode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
FRALLP6IG0OECYY7T3,2025-04-25 00:00:00+02:00,0,0,3,3,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:05:00+02:00,0,0,3,3,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:10:00+02:00,0,0,3,3,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:15:00+02:00,0,0,3,3,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:20:00+02:00,0,0,3,3,False,True,False,False,False,2
...,...,...,...,...,...,...,...,...,...,...,...
FRVIAP142101,2025-04-25 23:35:00+02:00,0,0,1,1,False,True,False,False,False,2
FRVIAP142101,2025-04-25 23:40:00+02:00,0,0,1,1,False,True,False,False,False,2
FRVIAP142101,2025-04-25 23:45:00+02:00,0,0,1,1,False,True,False,False,False,2
FRVIAP142101,2025-04-25 23:50:00+02:00,0,0,1,1,False,True,False,False,False,2


In [42]:
sample_state_station[['hs', 'inactif', 'sature', 'surcharge', 'actif']].agg('sum')

hs             90
inactif      9868
sature        158
surcharge      51
actif        3081
dtype: int64

In [43]:
sample_state_station.groupby('id_station_itinerance').agg('sum')

Unnamed: 0_level_0,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
id_station_itinerance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRALLP6IG0OECYY7T3,91,0,773,864,0,224,0,0,64,640
FRATLPFR00878,32,0,3440,3472,0,272,0,0,16,592
FRATLPFR00879,12,0,2298,2310,0,282,0,0,6,582
FRATLPFR00950,174,0,2217,2391,0,219,0,0,69,645
FRATLPFR00982,0,0,576,576,0,288,0,0,0,576
FRBMPP200350P,98,0,1054,1152,0,213,0,0,75,651
FRBMPP200350P50019614019442,8,0,280,288,0,280,8,0,0,600
FRELCPAAGIR,129,11,3028,3168,1,194,0,0,93,668
FRELCPAAVITE,61,0,3395,3456,0,231,0,0,57,633
FRELCPAXPHC,134,0,1306,1440,0,196,0,0,92,668


## Etats horaires

In [61]:
def to_sampled_state_grp_h(state_grp, group_name: str, echantillons: int, duree_etat_min: float):
    nb_ech_hour = echantillons / 24
    print(echantillons, nb_ech_hour)
    sampled = state_grp.reset_index()
    sampled['periode_h'] = sampled['periode'].dt.hour
    sampled['periode'] = sampled['periode'].dt.date
    
    sampled_h = sampled.groupby([group_name, 'periode', 'periode_h']).agg('sum')
    sampled_h = sampled_h / nb_ech_hour
    for etat in ['hs', 'inactif', 'sature', 'surcharge', 'actif']:
        sampled_h[etat] = sampled_h[etat] * 60
    sampled_h['nb_pdc'] = sampled_h['nb_pdc'].astype('int')
    
    sampled_h['sature_h'] = sampled_h['sature'] > duree_etat_min
    sampled_h['surcharge_h'] = sampled_h['surcharge'] > duree_etat_min
    
    return sampled_h[['nb_pdc', 'hs', 'inactif', 'sature', 'surcharge', 'actif', 'sature_h', 'surcharge_h']]

In [62]:
sample_state_station_h = to_sampled_state_grp_h(sample_state_station, 'id_station_itinerance', echantillons, 15)
sample_state_station_h

288 12.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
id_station_itinerance,periode,periode_h,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRALLP6IG0OECYY7T3,2025-04-25,0,3,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,1,3,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,2,3,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,3,3,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,4,3,0.0,60.0,0.0,0.0,0.0,False,False
...,...,...,...,...,...,...,...,...,...,...
FRVIAP142101,2025-04-25,19,1,0.0,60.0,0.0,0.0,0.0,False,False
FRVIAP142101,2025-04-25,20,1,0.0,60.0,0.0,0.0,0.0,False,False
FRVIAP142101,2025-04-25,21,1,0.0,60.0,0.0,0.0,0.0,False,False
FRVIAP142101,2025-04-25,22,1,0.0,60.0,0.0,0.0,0.0,False,False


In [63]:
sample_state_station_h[sample_state_station_h['sature_h']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
id_station_itinerance,periode,periode_h,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRBMPP200350P50019614019442,2025-04-25,17,1,0.0,20.0,40.0,0.0,0.0,True,False
FRPD1PELDMRS,2025-04-25,18,1,0.0,40.0,20.0,0.0,0.0,True,False
FRPD1PELDMRS,2025-04-25,19,1,0.0,30.0,30.0,0.0,0.0,True,False
FRPD1PRNTDST,2025-04-25,18,1,0.0,35.0,25.0,0.0,0.0,True,False
FRPD1PSGLLDS,2025-04-25,9,2,0.0,35.0,25.0,0.0,0.0,True,False
FRPD1PSGLLDS,2025-04-25,10,2,0.0,0.0,20.0,0.0,40.0,True,False
FRTSLP1719,2025-04-25,14,4,0.0,0.0,25.0,0.0,35.0,True,False
FRTSLP1719,2025-04-25,15,4,0.0,0.0,20.0,0.0,40.0,True,False
FRTSLP3017,2025-04-25,9,12,0.0,0.0,20.0,10.0,30.0,True,False
FRTSLP3017,2025-04-25,10,12,0.0,0.0,20.0,15.0,25.0,True,False


In [64]:
sample_state_station_h[sample_state_station_h['sature_h'] | sample_state_station_h['surcharge_h'] ]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
id_station_itinerance,periode,periode_h,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRBMPP200350P50019614019442,2025-04-25,17,1,0.0,20.0,40.0,0.0,0.0,True,False
FRPD1PELDMRS,2025-04-25,18,1,0.0,40.0,20.0,0.0,0.0,True,False
FRPD1PELDMRS,2025-04-25,19,1,0.0,30.0,30.0,0.0,0.0,True,False
FRPD1PRNTDST,2025-04-25,18,1,0.0,35.0,25.0,0.0,0.0,True,False
FRPD1PSGLLDS,2025-04-25,9,2,0.0,35.0,25.0,0.0,0.0,True,False
FRPD1PSGLLDS,2025-04-25,10,2,0.0,0.0,20.0,0.0,40.0,True,False
FRTSLP1719,2025-04-25,14,4,0.0,0.0,25.0,0.0,35.0,True,False
FRTSLP1719,2025-04-25,15,4,0.0,0.0,20.0,0.0,40.0,True,False
FRTSLP3017,2025-04-25,9,12,0.0,0.0,20.0,10.0,30.0,True,False
FRTSLP3017,2025-04-25,10,12,0.0,0.0,20.0,15.0,25.0,True,False


## Annexe : tests des fonctions

In [28]:
echantillons = 24
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')
start = [1, 1.2, 3, 5.5, 9, 13.1, 20]
end = [2.1, 2.7, 5, 7.5, 12.1, 15.1, 22.6]


test = pd.DataFrame( {'start': [timestamp + pd.Timedelta(hours=val) for val in start],
                      'end': [timestamp + pd.Timedelta(hours=val) for val in end],
                      'id_pdc_itinerance': ['p1', 'p2', 'p2', 'p1', 'p2', 'p1', 'p2']})
pdc = test['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'start': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'end': [timestamp + pd.Timedelta(hours=-1)] * len(pdc),
                      'id_pdc_itinerance': pdc}) 
# p1 : [1, 2.1], [5.5, 7.5], [13.1, 15.1]
# p2 : [1.2, 2.7], [3, 5], [9, 12.1], [20, 22.6]
# hp1 : [x, 1] [2.1, 5.5], [7.5, 13.1]
res = to_sampled_sessions(test, init, timestamp, echantillons)
#res

In [29]:
echantillons = 24
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')
valeurs = [1, 1.2, 3, 3.5, 5, 6.1, 12]

test = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(hours=val) for val in valeurs],
                      'etat_pdc':['en_service', 'hors_service', 'en_service', 'en_service', 
                                  'hors_service', 'en_service', 'hors_service'],
                      'id_pdc_itinerance': ['p1', 'p2', 'p2', 'p1', 'p2', 'p1', 'p2']})
pdc = test['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'etat_pdc': ['en_service'] * len(pdc), 
                      'id_pdc_itinerance': pdc}) 
res = to_sampled_statuses(test, init, timestamp, echantillons)
#res

In [30]:
sessions = pd.DataFrame({'id_pdc_itinerance': ['p1', 'p1', 'p1', 'p2', 'p2', 'p2', 'p3', 'p3', 'p3'], 
                       'periode': [0,1,2,0,1,2,0,1,2],
                       'occupation_pdc': ['occupe', 'f_libre', 'occupe', 'f_libre', 'occupe', 'f_libre','f_libre', 'occupe', 'f_libre']})
status = pd.DataFrame({'id_pdc_itinerance': ['p1', 'p1', 'p1', 'p3', 'p3', 'p3', 'p4', 'p4', 'p4'], 
                       'periode': [0,1,2,0,1,2, 0,1,2],
                       'etat_pdc': ['hors_service', 'hors_service', 'en_service', 'en_service', 'hors_service', 'hors_service', 'en_service', 'hors_service', 'en_service']})
merged = pd.merge(sessions, status, how='outer', on=['id_pdc_itinerance', 'periode']).fillna('aaa')
merged

Unnamed: 0,id_pdc_itinerance,periode,occupation_pdc,etat_pdc
0,p1,0,occupe,hors_service
1,p1,1,f_libre,hors_service
2,p1,2,occupe,en_service
3,p2,0,f_libre,aaa
4,p2,1,occupe,aaa
5,p2,2,f_libre,aaa
6,p3,0,f_libre,en_service
7,p3,1,occupe,hors_service
8,p3,2,f_libre,hors_service
9,p4,0,aaa,en_service


In [31]:
merged = to_sampled_state_pdc(sessions, status)
merged

Unnamed: 0,id_pdc_itinerance,periode,state
0,p1,0,occupe
1,p1,1,hors_service
2,p1,2,occupe
3,p2,0,libre
4,p2,1,occupe
5,p2,2,libre
6,p3,0,libre
7,p3,1,occupe
8,p3,2,hors_service
9,p4,0,libre


In [32]:
print(merged['state'])
merged['state'].str.replace('en_service', 'libre')
merged['state'] = merged['state'].str.replace('en_service', 'libre')
merged

0           occupe
1     hors_service
2           occupe
3            libre
4           occupe
5            libre
6            libre
7           occupe
8     hors_service
9            libre
10    hors_service
11           libre
Name: state, dtype: object


Unnamed: 0,id_pdc_itinerance,periode,state
0,p1,0,occupe
1,p1,1,hors_service
2,p1,2,occupe
3,p2,0,libre
4,p2,1,occupe
5,p2,2,libre
6,p3,0,libre
7,p3,1,occupe
8,p3,2,hors_service
9,p4,0,libre


In [33]:
test = pd.DataFrame({'id_pdc_itinerance': ['p1', 'p1', 'p1', 'p2', 'p2', 'p2', 'p3', 'p3', 'p3'],
                     'periode' : [0, 1, 2, 0, 1, 2, 0, 1, 2],
                     'state' : ['occupe', 'hors_service', 'occupe', 'libre', 'occupe', 'libre', 'libre', 'occupe', 'hors_service']})
stations = pd.DataFrame({'id_pdc_itinerance': ['p1', 'p2', 'p3'],
                         'id_station_itinerance': ['s1', 's1', 's2']}) 
to_sampled_state_grp(test, stations, 'id_station_itinerance')

9 9


Unnamed: 0_level_0,Unnamed: 1_level_0,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
id_station_itinerance,periode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
s1,0,1,0,1,2,False,False,False,False,True,3
s1,1,1,1,0,2,False,False,True,False,False,5
s1,2,1,0,1,2,False,False,False,False,True,3
s2,0,0,0,1,1,False,True,False,False,False,2
s2,1,1,0,0,1,False,False,True,False,False,5
s2,2,0,1,0,1,True,False,False,False,False,1


In [34]:
test = pd.DataFrame({'name':         ['hs', 'inactif', 'sature', 'surcharge', 'actif'],
                     'occupe':       [0, 0, 5, 3, 2],
                     'hors_service': [6, 2, 1, 2, 2],
                     'libre':        [0, 4, 0, 1, 2],
                     'nb_pdc':       [6, 6, 6, 6, 6]})
test['hs'] = (test['libre'] + test['occupe'] == 0) & (test['hors_service'] > 0)
test['inactif'] = ~test['hs'] & (test['occupe'] == 0)
test['sature'] = ~test['hs'] & ~test['inactif'] & (test['libre']/test['nb_pdc'] < 0.1)
test['surcharge'] = ~test['hs'] & ~test['inactif'] & ~test['sature'] & (test['libre']/test['nb_pdc'] < 0.2)
test['actif'] = ~test['hs'] & ~test['inactif'] & ~test['sature'] & ~test['surcharge']
test['state'] = test['hs'] + test['inactif'] * 2 + test['actif'] * 3 + test['surcharge'] * 4 + test['sature'] * 5
test

Unnamed: 0,name,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
0,hs,0,6,0,6,True,False,False,False,False,1
1,inactif,0,2,4,6,False,True,False,False,False,2
2,sature,5,1,0,6,False,False,True,False,False,5
3,surcharge,3,2,1,6,False,False,False,True,False,4
4,actif,2,2,2,6,False,False,False,False,True,3
