In [1]:
# Analyse des données de saturation

In [2]:
import os
import sys
new_path = os.getcwd()[:-34] + 'qualicharge-rtet'
sys.path.append(new_path)

In [3]:
import json, copy
from shapely import LineString, Point
import numpy as np
import geopandas as gpd
import pandas as pd
import geo_nx as gnx 
import networkx as nx 

from geo_nx import geom_to_crs, cast_id
from qualicharge_rtet import proximite, insertion_projection, association_stations
from qualicharge_rtet import creation_pandas_stations, Afir, export_stations_parcs
from qualicharge_rtet import get_rtet_attr_station, get_parc_id_station, propagation_attributs_core

In [4]:
GEOM = "geometry"
NODE_ID = "node_id"
NATURE = "nature"
WEIGHT = "weight"
CORE = "core"

data_quali = '../data/'
data_rtet = '../../rtet/'
origine = 'V1_0_0_'
stations_parcs_file = 'stations_parcs.geojson'
pdc_activite_file = ['pdc_en_activite_paca_avril_prod.json', 'pdc_en_activite_paca_avril_staging.json']
sessions_file = ['session_bouches_du_rhone_25_avril_staging.json', 'session_bouches_du_rhone_25_avril_prod.json']
status_file = ['status_bouches_du_rhone_25_avril_AM_staging.json', 'status_bouches_du_rhone_25_avril_AM_prod.json',
               'status_bouches_du_rhone_25_avril_PM18_staging.json', 'status_bouches_du_rhone_25_avril_PM24_staging.json',
               'status_bouches_du_rhone_25_avril_PM24_prod.json',]
refnat = {'tiles': 'cartodbpositron', 'location': [46.3, 2.3], 'zoom_start': 7}

## lecture des fichiers de la journée

In [5]:
stations_parcs = gpd.read_file(data_rtet + origine + stations_parcs_file)
#stations_parc

In [6]:
liste_status = [pd.read_json(data_quali + file, orient='records') for file in status_file]
status = pd.concat(liste_status , ignore_index=True)
status['horodatage'] = pd.to_datetime(status['horodatage'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
#status

In [7]:
liste_sessions = [pd.read_json(data_quali + file, orient='records') for file in sessions_file]
sessions = pd.concat(liste_sessions , ignore_index=True)
sessions['start'] = pd.to_datetime(sessions['start'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
sessions['end'] = pd.to_datetime(sessions['end'], format='ISO8601').astype('datetime64[s, UTC+02:00]')
#sessions

In [8]:
liste_pdc = [pd.read_json(data_quali + file, orient='records') for file in pdc_activite_file]
pdc_en_activite = pd.concat(liste_pdc , ignore_index=True)
#pdc_en_activite

## Génération des états échantillonnés des points de recharge

In [9]:
def to_sampled_statuses(data: pd.DataFrame, init_data: pd.DataFrame, timestamp: pd.Timestamp, echantillons: int):
    samples = pd.date_range(start=timestamp, end=timestamp+pd.Timedelta(days=1), periods=echantillons+1)
    periode = pd.DataFrame( {'periode': samples[0:echantillons]})
    state = pd.concat([data, init_data]).sort_values(by=['id_pdc_itinerance', 'horodatage'])
    state = state[(state['etat_pdc'] != 'inconnu')]
    state['f_horodatage'] = list(state['horodatage'])[1:len(state)] + [samples[echantillons]]
    state['f_id_pdc_itinerance'] = list(state['id_pdc_itinerance'])[1:len(state)] + ['aucun']
    crossed = pd.merge(state, periode, how='cross')
    sampled = crossed[((crossed['id_pdc_itinerance'].eq(crossed['f_id_pdc_itinerance'])) &
                 (crossed['periode'] >= crossed['horodatage']) &
                 (crossed['periode'] < crossed['f_horodatage'])) |
                (~(crossed['id_pdc_itinerance'].eq(crossed['f_id_pdc_itinerance'])) &
                 (crossed['periode'] >= crossed['horodatage']))]
    columns = ['periode', 'etat_pdc', 'id_pdc_itinerance']
    return sampled[columns].sort_values(by=['id_pdc_itinerance', 'periode']).reset_index(drop=True)

In [10]:
def to_sampled_sessions(data: pd.DataFrame, init_data: pd.DataFrame, timestamp: pd.Timestamp, echantillons: int):
    null_date = pd.Timestamp('2000-01-01T00:00:00+02:00')
    samples = pd.date_range(start=timestamp, end=timestamp+pd.Timedelta(days=1), periods=echantillons+1)
    periode = pd.DataFrame( {'periode': samples[0:echantillons]})
    sessions = pd.concat([data, init_data]).sort_values(by=['id_pdc_itinerance', 'start'])
    pdc = sessions['id_pdc_itinerance'].unique()
    sessions['occupation_pdc'] = 'occupe'
    
    crossed = pd.merge(sessions, periode, how='cross')
    sampled = crossed[((crossed['periode'] >= crossed['start']) &
                       (crossed['periode'] < crossed['end']))][['periode', 'occupation_pdc', 'id_pdc_itinerance']]
    
    non_occupe = pd.merge(periode, pd.DataFrame({'id_pdc_itinerance': sessions['id_pdc_itinerance'].unique()}), how='cross')
    sampled = pd.merge(non_occupe, sampled, how='left', on=['id_pdc_itinerance', 'periode']).fillna("f_libre")
    return sampled.sort_values(by=['id_pdc_itinerance', 'periode']).reset_index(drop=True)

In [11]:
echantillons = 288 #24
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')

In [12]:
pdc = status['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'etat_pdc': ['en_service'] * len(pdc), 
                      'id_pdc_itinerance': pdc}) 
sampled_statuses = to_sampled_statuses(status, init, timestamp, echantillons)
#sampled_statuses

In [13]:
pd.pivot_table(sampled_statuses, columns='etat_pdc', aggfunc="count")

etat_pdc,en_service,hors_service
id_pdc_itinerance,164242,1646
periode,164242,1646


In [14]:
pdc = sessions['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'start': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'end': [timestamp + pd.Timedelta(hours=-1)] * len(pdc),
                      'id_pdc_itinerance': pdc}) 
sampled_sessions = to_sampled_sessions(sessions, init, timestamp, echantillons)
#sampled_sessions

In [15]:
pd.pivot_table(sampled_sessions, index = 'id_pdc_itinerance', columns='occupation_pdc', aggfunc="count")
pd.pivot_table(sampled_sessions, columns='occupation_pdc', aggfunc="count")

occupation_pdc,f_libre,occupe
id_pdc_itinerance,98249,13371
periode,98249,13371


## regroupement des fichiers

In [16]:
def to_sampled_state(sessions, statuses):
    merged = pd.merge(sessions, statuses, how='left', on=['id_pdc_itinerance', 'periode']).fillna('aaa')
    merged['state'] = merged[['etat_pdc', 'occupation_pdc']].agg('max', axis=1)
    merged = merged[['id_pdc_itinerance', 'periode', 'state']].replace('f_libre', 'libre')
    return merged.sort_values(by=['id_pdc_itinerance', 'periode']).reset_index(drop=True)

In [17]:
sampled_state = to_sampled_state(sampled_sessions, sampled_statuses)
sampled_state

Unnamed: 0,id_pdc_itinerance,periode,state
0,FRALLEGO0001411,2025-04-25 00:00:00+02:00,libre
1,FRALLEGO0001411,2025-04-25 00:05:00+02:00,libre
2,FRALLEGO0001411,2025-04-25 00:10:00+02:00,libre
3,FRALLEGO0001411,2025-04-25 00:15:00+02:00,libre
4,FRALLEGO0001411,2025-04-25 00:20:00+02:00,libre
...,...,...,...
111615,FRVIAE20142106022,2025-04-25 23:35:00+02:00,libre
111616,FRVIAE20142106022,2025-04-25 23:40:00+02:00,libre
111617,FRVIAE20142106022,2025-04-25 23:45:00+02:00,libre
111618,FRVIAE20142106022,2025-04-25 23:50:00+02:00,libre


In [18]:
pd.pivot_table(sampled_state, columns='state', aggfunc="count")

state,hors_service,libre,occupe
id_pdc_itinerance,456,97793,13371
periode,456,97793,13371


## Annexe : tests des fonctions

In [19]:
echantillons = 24
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')
start = [1, 1.2, 3, 5.5, 9, 13.1, 20]
end = [2.1, 2.7, 5, 7.5, 12.1, 15.1, 22.6]


test = pd.DataFrame( {'start': [timestamp + pd.Timedelta(hours=val) for val in start],
                      'end': [timestamp + pd.Timedelta(hours=val) for val in end],
                      'id_pdc_itinerance': ['p1', 'p2', 'p2', 'p1', 'p2', 'p1', 'p2']})
pdc = test['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'start': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'end': [timestamp + pd.Timedelta(hours=-1)] * len(pdc),
                      'id_pdc_itinerance': pdc}) 
# p1 : [1, 2.1], [5.5, 7.5], [13.1, 15.1]
# p2 : [1.2, 2.7], [3, 5], [9, 12.1], [20, 22.6]
# hp1 : [x, 1] [2.1, 5.5], [7.5, 13.1]
res = to_sampled_sessions(test, init, timestamp, echantillons)
#res

In [20]:
echantillons = 24
timestamp = pd.Timestamp('2025-04-25T00:00:00+02:00')
valeurs = [1, 1.2, 3, 3.5, 5, 6.1, 12]

test = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(hours=val) for val in valeurs],
                      'etat_pdc':['en_service', 'hors_service', 'en_service', 'en_service', 
                                  'hors_service', 'en_service', 'hors_service'],
                      'id_pdc_itinerance': ['p1', 'p2', 'p2', 'p1', 'p2', 'p1', 'p2']})
pdc = test['id_pdc_itinerance'].unique()
init = pd.DataFrame( {'horodatage': [timestamp + pd.Timedelta(days=-1)] * len(pdc), 
                      'etat_pdc': ['en_service'] * len(pdc), 
                      'id_pdc_itinerance': pdc}) 
res = to_sampled_statuses(test, init, timestamp, echantillons)
#res

In [21]:
sessions = pd.DataFrame({'id_pdc_itinerance': ['p1', 'p1', 'p1', 'p2', 'p2', 'p2', 'p3', 'p3', 'p3'], 
                       'periode': [0,1,2,0,1,2,0,1,2],
                       'occupation': ['occupe', 'flibre', 'occupe', 'flibre', 'occupe', 'flibre','flibre', 'occupe', 'flibre']})
status = pd.DataFrame({'id_pdc_itinerance': ['p1', 'p1', 'p1', 'p3', 'p3', 'p3'], 
                       'periode': [0,1,2,0,1,2],
                       'etat': ['hors_service', 'hors_service', 'en_service', 'en_service', 'hors_service', 'hors_service']})
merged = pd.merge(sessions, status, how='left', on=['id_pdc_itinerance', 'periode']).fillna('aaa')
#merged

In [22]:
merged['state'] = merged[['etat', 'occupation']].agg('max', axis=1)
merged

Unnamed: 0,id_pdc_itinerance,periode,occupation,etat,state
0,p1,0,occupe,hors_service,occupe
1,p1,1,flibre,hors_service,hors_service
2,p1,2,occupe,en_service,occupe
3,p2,0,flibre,aaa,flibre
4,p2,1,occupe,aaa,occupe
5,p2,2,flibre,aaa,flibre
6,p3,0,flibre,en_service,flibre
7,p3,1,occupe,hors_service,occupe
8,p3,2,flibre,hors_service,hors_service


In [28]:
merged['station'] = ['s1'] * 6 + ['s2'] * 3
merged['occupe'] = merged['state'] == 'occupe'
merged['hors_service'] = merged['state'] == 'hors_service'
merged['libre'] = merged['state'] == 'flibre'
merged

Unnamed: 0,id_pdc_itinerance,periode,occupation,etat,state,station,occupe,hors_service,libre
0,p1,0,occupe,hors_service,occupe,s1,True,False,False
1,p1,1,flibre,hors_service,hors_service,s1,False,True,False
2,p1,2,occupe,en_service,occupe,s1,True,False,False
3,p2,0,flibre,aaa,flibre,s1,False,False,True
4,p2,1,occupe,aaa,occupe,s1,True,False,False
5,p2,2,flibre,aaa,flibre,s1,False,False,True
6,p3,0,flibre,en_service,flibre,s2,False,False,True
7,p3,1,occupe,hors_service,occupe,s2,True,False,False
8,p3,2,flibre,hors_service,hors_service,s2,False,True,False


In [29]:
grouped = merged.groupby('station')
grouped.get_group('s1')

Unnamed: 0,id_pdc_itinerance,periode,occupation,etat,state,station,occupe,hors_service,libre
0,p1,0,occupe,hors_service,occupe,s1,True,False,False
1,p1,1,flibre,hors_service,hors_service,s1,False,True,False
2,p1,2,occupe,en_service,occupe,s1,True,False,False
3,p2,0,flibre,aaa,flibre,s1,False,False,True
4,p2,1,occupe,aaa,occupe,s1,True,False,False
5,p2,2,flibre,aaa,flibre,s1,False,False,True


In [33]:
grouped = merged[['station', 'occupe', 'hors_service', 'libre']].groupby('station').sum()
grouped['nb_pdc'] = grouped['occupe'] + grouped['hors_service'] + grouped['libre']
grouped

Unnamed: 0_level_0,occupe,hors_service,libre,nb_pdc
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
s1,3,1,2,6
s2,1,1,1,3


In [48]:
test = pd.DataFrame({'name':         ['hs', 'inactif', 'sature', 'surcharge', 'actif'],
                     'occupe':       [0, 0, 5, 3, 2],
                     'hors_service': [6, 2, 1, 2, 2],
                     'libre':        [0, 4, 0, 1, 2],
                     'nb_pdc':       [6, 6, 6, 6, 6]})
test['hs'] = (test['libre'] + test['occupe'] == 0) & (test['hors_service'] > 0)
test['inactif'] = ~test['hs'] & (test['occupe'] == 0)
test['sature'] = ~test['hs'] & ~test['inactif'] & (test['libre']/test['nb_pdc'] < 0.1)
test['surcharge'] = ~test['hs'] & ~test['inactif'] & ~test['sature'] & (test['libre']/test['nb_pdc'] < 0.2)
test['actif'] = ~test['hs'] & ~test['inactif'] & ~test['sature'] & ~test['surcharge']
test['state'] = test['hs'] + test['inactif'] * 2 + test['actif'] * 3 + test['surcharge'] * 4 + test['sature'] * 5
test

Unnamed: 0,name,occupe,hors_service,libre,nb_pdc,hs,inactif,sature,surcharge,actif,state
0,hs,0,6,0,6,True,False,False,False,False,1
1,inactif,0,2,4,6,False,True,False,False,False,2
2,sature,5,1,0,6,False,False,True,False,False,5
3,surcharge,3,2,1,6,False,False,False,True,False,4
4,actif,2,2,2,6,False,False,False,False,True,3
