# Analyse des données de saturation

In [1]:
import os
import sys
new_path = os.getcwd()[:-34] + 'qualicharge-rtet'
sys.path.append(new_path)

In [2]:
import geopandas as gpd
import pandas as pd

from saturation import to_sampled_statuses, to_sampled_sessions, to_sampled_state_pdc, to_sampled_state_grp, to_sampled_state_grp_h

In [3]:
data_quali = '../data/'
data_rtet = '../../rtet/'
origine = 'V1_0_0_'
stations_parcs_file = 'stations_parcs.geojson'
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')

pdc_activite_file = ['pdc_en_activite_paca_avril_prod.json', 'pdc_en_activite_paca_avril_staging.json']
sessions_file = ['session_bouches_du_rhone_25_avril_staging.json', 'session_bouches_du_rhone_25_avril_prod.json']
status_file = ['status_bouches_du_rhone_25_avril_AM_staging.json', 'status_bouches_du_rhone_25_avril_AM_prod.json',
               'status_bouches_du_rhone_25_avril_PM18_staging.json', 'status_bouches_du_rhone_25_avril_PM24_staging.json',
               'status_bouches_du_rhone_25_avril_PM24_prod.json',]

refnat = {'tiles': 'cartodbpositron', 'location': [46.3, 2.3], 'zoom_start': 7}

## lecture des fichiers de la journée

In [4]:
liste_pdc = [pd.read_json(data_quali + file, orient='records') for file in pdc_activite_file]
pdc_en_activite = pd.concat(liste_pdc , ignore_index=True)[['id_pdc_itinerance', 'id_station_itinerance']].groupby('id_pdc_itinerance').first().reset_index()
len(pdc_en_activite)
#pdc_en_activite

1425

In [5]:
pdc_en_activite[pdc_en_activite['id_station_itinerance']=='FRPD1PRNTDST']

Unnamed: 0,id_pdc_itinerance,id_station_itinerance
896,FRPD1ERNTDSTALFS22011,FRPD1PRNTDST
897,FRPD1ERNTDSTKPC200011,FRPD1PRNTDST
898,FRPD1ERNTDSTKPC200012,FRPD1PRNTDST
899,FRPD1ERNTDSTKPC200013,FRPD1PRNTDST
900,FRPD1ERNTDSTKPC200014,FRPD1PRNTDST


In [6]:
stations_parcs = gpd.read_file(data_rtet + origine + stations_parcs_file)
stations_parcs.rename(columns={'id_station':'id_station_itinerance'}, inplace=True)
#stations_parcs

In [7]:
pdc_stations = pdc_en_activite[pdc_en_activite['id_station_itinerance'].isin(stations_parcs['id_station_itinerance'])]
#pdc_stations

In [8]:
pdc_stations[pdc_stations['id_station_itinerance']=='FRPD1PRNTDST']

Unnamed: 0,id_pdc_itinerance,id_station_itinerance
896,FRPD1ERNTDSTALFS22011,FRPD1PRNTDST
897,FRPD1ERNTDSTKPC200011,FRPD1PRNTDST
898,FRPD1ERNTDSTKPC200012,FRPD1PRNTDST
899,FRPD1ERNTDSTKPC200013,FRPD1PRNTDST
900,FRPD1ERNTDSTKPC200014,FRPD1PRNTDST


In [9]:
liste_statuses = [pd.read_json(data_quali + file, orient='records') for file in status_file]
all_statuses = pd.concat(liste_statuses , ignore_index=True)
print(len(all_statuses))
status = all_statuses[all_statuses['id_pdc_itinerance'].isin(pdc_stations['id_pdc_itinerance'])].copy()
print(len(status))
status['horodatage'] = pd.to_datetime(status['horodatage'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
#status

6924
3573


In [10]:
liste_sessions = [pd.read_json(data_quali + file, orient='records') for file in sessions_file]
all_sessions = pd.concat(liste_sessions , ignore_index=True)
print(len(all_sessions))
sessions = all_sessions[all_sessions['id_pdc_itinerance'].isin(pdc_stations['id_pdc_itinerance'])].copy()
print(len(sessions))
sessions['start'] = pd.to_datetime(sessions['start'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
sessions['end'] = pd.to_datetime(sessions['end'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
#sessions

2033
1241


## génération des états échantillonnés des statuts et sessions

In [11]:
echantillons = 288 #24

In [12]:
stations_status = pdc_en_activite.set_index('id_pdc_itinerance').loc[status['id_pdc_itinerance'].unique(), :]
pdc_status = pdc_en_activite.set_index('id_station_itinerance').loc[stations_status['id_station_itinerance'].unique(), :].reset_index()
#pdc = status['id_pdc_itinerance'].unique()
pdc = pdc_status['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'etat_pdc': ['en_service'] * len(pdc), 
                      'id_pdc_itinerance': pdc}) 
sampled_statuses = to_sampled_statuses(status, init, timestamp, echantillons)
#sampled_statuses

In [13]:
pd.pivot_table(sampled_statuses, columns='etat_pdc', aggfunc="count")

etat_pdc,en_service,hors_service
id_pdc_itinerance,87537,303
periode,87537,303


In [14]:
stations_sessions = pdc_en_activite.set_index('id_pdc_itinerance').loc[sessions['id_pdc_itinerance'].unique(), :]
pdc_sessions = pdc_en_activite.set_index('id_station_itinerance').loc[stations_sessions['id_station_itinerance'].unique(), :].reset_index()
#pdc = sessions['id_pdc_itinerance'].unique()
pdc = pdc_sessions['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'start': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'end': [timestamp + pd.Timedelta(hours=-1)] * len(pdc),
                      'id_pdc_itinerance': pdc}) 
sampled_sessions = to_sampled_sessions(sessions, init, timestamp, echantillons)
#sampled_sessions

In [15]:
pd.pivot_table(sampled_sessions, index = 'id_pdc_itinerance', columns='occupation_pdc', aggfunc="count")
pd.pivot_table(sampled_sessions, columns='occupation_pdc', aggfunc="count")

occupation_pdc,f_libre,occupe
id_pdc_itinerance,80013,7360
periode,80013,7360


## état des pdc par regroupement des statuts et sessions

In [16]:
sampled_state_pdc = to_sampled_state_pdc(sampled_sessions, sampled_statuses)
sampled_state_pdc

Unnamed: 0,id_pdc_itinerance,periode,state
0,FRALLEGO0001411,2025-04-25 00:00:00+02:00,libre
1,FRALLEGO0001411,2025-04-25 00:05:00+02:00,libre
2,FRALLEGO0001411,2025-04-25 00:10:00+02:00,libre
3,FRALLEGO0001411,2025-04-25 00:15:00+02:00,libre
4,FRALLEGO0001411,2025-04-25 00:20:00+02:00,libre
...,...,...,...
89096,FRVIAE20142101011,2025-04-25 23:35:00+02:00,libre
89097,FRVIAE20142101011,2025-04-25 23:40:00+02:00,libre
89098,FRVIAE20142101011,2025-04-25 23:45:00+02:00,libre
89099,FRVIAE20142101011,2025-04-25 23:50:00+02:00,libre


In [17]:
pd.pivot_table(sampled_state_pdc, columns='state', aggfunc="count")

state,hors_service,libre,occupe
id_pdc_itinerance,297,81444,7360
periode,297,81444,7360


## état global échantillonné d'un groupement de pdc (stations ou parcs)

In [28]:
sample_state_station = to_sampled_state_grp(sampled_state_pdc, pdc_stations, 'id_station_itinerance')
sample_state_station

89101 89101


Unnamed: 0_level_0,Unnamed: 1_level_0,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
id_station_itinerance,periode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
FRALLP6IG0OECYY7T3,2025-04-25 00:00:00+02:00,0,0,4,4,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:05:00+02:00,0,0,4,4,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:10:00+02:00,0,0,4,4,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:15:00+02:00,0,0,4,4,False,True,False,False,False,2
FRALLP6IG0OECYY7T3,2025-04-25 00:20:00+02:00,0,0,4,4,False,True,False,False,False,2
...,...,...,...,...,...,...,...,...,...,...,...
FRVIAP142101,2025-04-25 23:35:00+02:00,0,0,1,1,False,True,False,False,False,2
FRVIAP142101,2025-04-25 23:40:00+02:00,0,0,1,1,False,True,False,False,False,2
FRVIAP142101,2025-04-25 23:45:00+02:00,0,0,1,1,False,True,False,False,False,2
FRVIAP142101,2025-04-25 23:50:00+02:00,0,0,1,1,False,True,False,False,False,2


In [19]:
sample_state_station[['hs', 'inactif', 'sature', 'surcharge', 'actif']].agg('sum')

hs             70
inactif      9888
sature        122
surcharge      51
actif        3117
dtype: int64

In [20]:
sample_state_station.groupby('id_station_itinerance').agg('sum')

Unnamed: 0_level_0,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
id_station_itinerance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRALLP6IG0OECYY7T3,91,0,1061,1152,0,224,0,0,64,640
FRATLPFR00878,32,0,3728,3760,0,272,0,0,16,592
FRATLPFR00879,12,0,2298,2310,0,282,0,0,6,582
FRATLPFR00950,174,0,2217,2391,0,219,0,0,69,645
FRATLPFR00982,0,0,576,576,0,288,0,0,0,576
FRBMPP200350P,98,0,1054,1152,0,213,0,0,75,651
FRBMPP200350P50019614019442,8,0,568,576,0,280,0,0,8,584
FRELCPAAGIR,129,11,3028,3168,1,194,0,0,93,668
FRELCPAAVITE,61,0,3395,3456,0,231,0,0,57,633
FRELCPAXPHC,134,0,1306,1440,0,196,0,0,92,668


## état global horaire d'un groupement de pdc

In [21]:
sample_state_station_h = to_sampled_state_grp_h(sample_state_station, 'id_station_itinerance', echantillons, 15)
sample_state_station_h

288 12.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
id_station_itinerance,periode,periode_h,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRALLP6IG0OECYY7T3,2025-04-25,0,4,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,1,4,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,2,4,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,3,4,0.0,60.0,0.0,0.0,0.0,False,False
FRALLP6IG0OECYY7T3,2025-04-25,4,4,0.0,60.0,0.0,0.0,0.0,False,False
...,...,...,...,...,...,...,...,...,...,...
FRVIAP142101,2025-04-25,19,1,0.0,60.0,0.0,0.0,0.0,False,False
FRVIAP142101,2025-04-25,20,1,0.0,60.0,0.0,0.0,0.0,False,False
FRVIAP142101,2025-04-25,21,1,0.0,60.0,0.0,0.0,0.0,False,False
FRVIAP142101,2025-04-25,22,1,0.0,60.0,0.0,0.0,0.0,False,False


In [34]:
sample_state_station_h.loc['FRTSLP1719']

Unnamed: 0_level_0,Unnamed: 1_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
periode,periode_h,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-04-25,0,4,0.0,20.0,0.0,0.0,40.0,False,False
2025-04-25,1,4,0.0,30.0,0.0,0.0,30.0,False,False
2025-04-25,2,4,0.0,60.0,0.0,0.0,0.0,False,False
2025-04-25,3,4,0.0,60.0,0.0,0.0,0.0,False,False
2025-04-25,4,4,0.0,60.0,0.0,0.0,0.0,False,False
2025-04-25,5,4,0.0,60.0,0.0,0.0,0.0,False,False
2025-04-25,6,4,0.0,60.0,0.0,0.0,0.0,False,False
2025-04-25,7,4,0.0,35.0,0.0,0.0,25.0,False,False
2025-04-25,8,4,0.0,15.0,0.0,0.0,45.0,False,False
2025-04-25,9,4,0.0,0.0,15.0,0.0,45.0,False,False


In [22]:
sature = sample_state_station_h[sample_state_station_h['sature_h']]
sature

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
id_station_itinerance,periode,periode_h,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRTSLP1719,2025-04-25,14,4,0.0,0.0,25.0,0.0,35.0,True,False
FRTSLP1719,2025-04-25,15,4,0.0,0.0,20.0,0.0,40.0,True,False
FRTSLP3017,2025-04-25,9,12,0.0,0.0,20.0,10.0,30.0,True,False
FRTSLP3017,2025-04-25,10,12,0.0,0.0,20.0,15.0,25.0,True,False
FRTSLP3017,2025-04-25,11,12,0.0,0.0,25.0,10.0,25.0,True,False
FRTSLP3017,2025-04-25,12,12,0.0,0.0,50.0,5.0,5.0,True,False
FRTSLP3017,2025-04-25,13,12,0.0,0.0,35.0,0.0,25.0,True,False
FRTSLP3017,2025-04-25,14,12,0.0,0.0,35.0,0.0,25.0,True,False
FRTSLP3017,2025-04-25,15,12,0.0,0.0,35.0,0.0,25.0,True,False
FRTSLP30237,2025-04-25,12,12,0.0,0.0,20.0,25.0,15.0,True,True


In [23]:
surcharge = sample_state_station_h[sample_state_station_h['sature_h'] | sample_state_station_h['surcharge_h'] ]
surcharge

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nb_pdc,hs,inactif,sature,surcharge,actif,sature_h,surcharge_h
id_station_itinerance,periode,periode_h,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FRTSLP1719,2025-04-25,14,4,0.0,0.0,25.0,0.0,35.0,True,False
FRTSLP1719,2025-04-25,15,4,0.0,0.0,20.0,0.0,40.0,True,False
FRTSLP3017,2025-04-25,9,12,0.0,0.0,20.0,10.0,30.0,True,False
FRTSLP3017,2025-04-25,10,12,0.0,0.0,20.0,15.0,25.0,True,False
FRTSLP3017,2025-04-25,11,12,0.0,0.0,25.0,10.0,25.0,True,False
FRTSLP3017,2025-04-25,12,12,0.0,0.0,50.0,5.0,5.0,True,False
FRTSLP3017,2025-04-25,13,12,0.0,0.0,35.0,0.0,25.0,True,False
FRTSLP3017,2025-04-25,14,12,0.0,0.0,35.0,0.0,25.0,True,False
FRTSLP3017,2025-04-25,15,12,0.0,0.0,35.0,0.0,25.0,True,False
FRTSLP3017,2025-04-25,16,12,0.0,0.0,10.0,25.0,25.0,False,True


In [24]:
surcharge_stations = pd.merge(surcharge.reset_index(), stations_parcs[['id_station_itinerance', 'parc_id', 'operateur', 'parc_nature', 'geometry']], how='left', on='id_station_itinerance')
#surcharge_stations

In [25]:
refnat = {'tiles': 'cartodbpositron', 'location': [46.3, 2.3], 'zoom_start': 5}
gpd.GeoDataFrame(surcharge_stations, crs='2154').explore(**refnat)