# Import librairies

In [1]:
import pandas as pd
from sqlalchemy import text

from database_connection import create_postgres_engine
from modeling_database import build_incident_dataframes
from hazard_index import hazard_index_v16, HazardParams

# Connect to database

In [2]:
engine = create_postgres_engine("../../variables.env")

with engine.connect() as conn:
     print(conn.execute(text("SELECT version();")).scalar())

PostgreSQL 18.1 on aarch64-unknown-linux-gnu, compiled by aarch64-unknown-linux-gnu-gcc (GCC) 12.4.0, 64-bit


In [3]:
sql = text("""
SELECT *
FROM all_incidents ai
""")

with engine.connect() as conn:
    df = pd.read_sql(sql, conn)

df = df.sort_values("situation_record_creation_time", ascending=True)

# Building dataframe

In [None]:
df2 = build_incident_dataframes(df)

df2.to_csv("incidents_summary.csv", index=False)

df2.head()

Unnamed: 0,id,run_id,extracted_at,situation_id,situation_version,situation_record_id,situation_record_version,situation_type,overall_severity,severity,...,to_longitude,to_kilometer_point,to_municipality,to_province,to_autonomous_community,compliance_option,vehicle_type,management_type,general_public_comment,situation_record_end_time
0,1,09690d63-6fc9-4d65-939d-a3352c1a70cf,2026-01-22 18:08:59.630945+00:00,6129,,9454,1,RoadOrCarriagewayOrLaneManagement,low,low,...,-3.108543,19.0,Murtas,Granada,Andalucía,mandatory,anyVehicle,laneClosures,,NaT
1,2,09690d63-6fc9-4d65-939d-a3352c1a70cf,2026-01-22 18:08:59.630945+00:00,73792,,120870,1,RoadOrCarriagewayOrLaneManagement,medium,medium,...,-5.761705,7.0,Gijón,Asturias,"Asturias, Principado de",mandatory,anyVehicle,laneClosures,,NaT
2,3,09690d63-6fc9-4d65-939d-a3352c1a70cf,2026-01-22 18:08:59.630945+00:00,73813,,120891,1,RoadOrCarriagewayOrLaneManagement,highest,highest,...,-6.339432,464.2,Salas,Asturias,"Asturias, Principado de",mandatory,anyVehicle,roadClosed,,NaT
3,4,09690d63-6fc9-4d65-939d-a3352c1a70cf,2026-01-22 18:08:59.630945+00:00,191105,,324404,1,RoadOrCarriagewayOrLaneManagement,high,high,...,-5.534313,19.8,Quintana de la Serena,Badajoz,Extremadura,mandatory,anyVehicle,roadClosed,,NaT
4,5,09690d63-6fc9-4d65-939d-a3352c1a70cf,2026-01-22 18:08:59.630945+00:00,209887,,355484,1,RoadOrCarriagewayOrLaneManagement,high,high,...,,,,,,mandatory,anyVehicle,roadClosed,,NaT


# Cleaning dataframe

In [None]:
cols_to_drop = [
    "situation_version", 
    "situation_record_version", 
    "overall_severity", 
    "probability_of_occurrence", 
    "information_status", 
    "source_identification", 
    "compliance_option", 
    "general_public_comment", 
    "situation_record_id", 
    "situation_record_version_time", 
    "id", 
    "run_id", 
    "extracted_at", 
    "situation_record_id", 
    "situation_record_version_time",
    "tpeg_location_type"
]

incidents_table = df2.drop(columns=cols_to_drop, errors="ignore")

In [None]:
df2.head()

Unnamed: 0,situation_id,situation_type,severity,validity_status,situation_record_creation_time,validity_overall_start_time,validity_overall_end_time,cause_type,detailed_cause_type,cause_subtype,...,from_autonomous_community,to_latitude,to_longitude,to_kilometer_point,to_municipality,to_province,to_autonomous_community,vehicle_type,management_type,situation_record_end_time
0,6129,RoadOrCarriagewayOrLaneManagement,low,active,2021-12-10 13:15:55+00:00,2021-09-07 13:00:00+00:00,NaT,roadMaintenance,roadMaintenance,roadworks,...,Andalucía,36.909466,-3.108543,19.0,Murtas,Granada,Andalucía,anyVehicle,laneClosures,NaT
1,73792,RoadOrCarriagewayOrLaneManagement,medium,active,2022-03-21 12:05:19+00:00,2021-02-24 11:40:00+00:00,NaT,roadMaintenance,roadMaintenance,roadworks,...,"Asturias, Principado de",43.555912,-5.761705,7.0,Gijón,Asturias,"Asturias, Principado de",anyVehicle,laneClosures,NaT
2,73813,RoadOrCarriagewayOrLaneManagement,highest,active,2022-03-21 12:05:21+00:00,2021-08-03 15:09:00+00:00,NaT,roadMaintenance,roadMaintenance,roadworks,...,"Asturias, Principado de",43.392933,-6.339432,464.2,Salas,Asturias,"Asturias, Principado de",anyVehicle,roadClosed,NaT
3,191105,RoadOrCarriagewayOrLaneManagement,high,active,2022-05-05 15:03:32+00:00,2018-12-18 11:05:00+00:00,NaT,roadMaintenance,roadMaintenance,roadworks,...,Extremadura,38.983944,-5.534313,19.8,Quintana de la Serena,Badajoz,Extremadura,anyVehicle,roadClosed,NaT
4,209887,RoadOrCarriagewayOrLaneManagement,high,active,2022-05-10 10:42:37+00:00,2022-05-10 10:35:00+00:00,NaT,roadMaintenance,roadMaintenance,roadworks,...,,,,,,,,anyVehicle,roadClosed,NaT


In [20]:
counts = incidents_table["situation_type"].value_counts(dropna=False)

print(counts)

situation_type
GenericSituationRecord                    42031
RoadOrCarriagewayOrLaneManagement          2257
AbnormalTraffic                             957
GeneralInstructionOrMessageToRoadUsers      199
SpeedManagement                             162
PoorEnvironmentConditions                   162
MaintenanceWorks                            133
NonWeatherRelatedRoadConditions              99
GeneralObstruction                           83
WinterDrivingManagement                      49
VehicleObstruction                           17
Name: count, dtype: int64


In [22]:
counts = incidents_table["severity"].value_counts(dropna=False)

print(counts)

severity
None       44217
medium      1204
high         338
low          242
highest      148
Name: count, dtype: int64


In [25]:
counts = incidents_table["validity_status"].value_counts(dropna=False)

print(counts)

validity_status
inactive    45214
active        935
Name: count, dtype: int64


In [27]:
counts = incidents_table["cause_type"].value_counts(dropna=False)

print(counts)

cause_type
vehicleObstruction                   40075
obstruction                           2188
poorEnvironment                        979
roadMaintenance                        912
abnormalTraffic                        896
accident                               861
environmentalObstruction               122
roadOrCarriagewayOrLaneManagement       91
infrastructureDamageObstruction         19
disturbance                              4
publicEvent                              2
Name: count, dtype: int64


In [28]:
counts = incidents_table["detailed_cause_type"].value_counts(dropna=False)

print(counts)

detailed_cause_type
None               44228
roadMaintenance     1060
accident             861
Name: count, dtype: int64


In [None]:
counts = incidents_table["cause_subtype"].value_counts(dropna=False)
params = HazardParams(R0=1500, r=300, L=3, gamma=1.2, alpha=1.0, d0=500)
print(counts)

cause_subtype
None                44228
roadworks             912
accident              861
snowploughsInUse      148
Name: count, dtype: int64


In [30]:
counts = incidents_table["carriageway"].value_counts(dropna=False)

print(counts)

carriageway
None                      42059
unspecifiedCarriageway     3755
exitSlipRoad                218
entrySlipRoad                59
serviceRoad                  58
Name: count, dtype: int64


In [32]:
counts = incidents_table["vehicle_type"].value_counts(dropna=False)

print(counts)

vehicle_type
None                 43422
anyVehicle            2186
heavyVehicle           471
bus                     46
_extended               20
car                      2
largeGoodsVehicle        2
Name: count, dtype: int64


In [33]:
counts = incidents_table["management_type"].value_counts(dropna=False)

print(counts)

management_type
None                                        43667
laneClosures                                 1239
doNotUseSpecifiedLanesOrCarriageways          429
narrowLanes                                   332
roadClosed                                    145
carriagewayClosures                            84
other                                          80
singleAlternateLineTraffic                     71
vehicleStorageInOperation                      28
newRoadworksLayout                             20
lanesDeviated                                  16
intermittentShortTermClosures                  13
useOfSpecifiedLanesOrCarriagewaysAllowed       12
_extended                                      11
weightRestrictionInOperation                    2
Name: count, dtype: int64


# Hazard index

In [2]:
df2 = pd.read_csv("../../data/incidents_summary.csv")

In [3]:
params = HazardParams(R0=1500, r=300, L=3, gamma=1.2, alpha=1.0, d0=500)

In [6]:
H, details = hazard_index_v16(
    params=params,
    q_lat=40.348436,
    q_lon=-3.861383,
    df2=df2,
    window_start="2026-01-25T00:00:00Z",
    return_details=True,
)

In [7]:
print("Hazard Index:", H)
print("Details:", details)

Hazard Index: 5.012486548264671
Details: {'H': 5.012486548264671, 'window_start': Timestamp('2026-01-25 00:00:00+0000', tz='UTC'), 'window_end': Timestamp('2026-01-29 15:50:35.641951+0000', tz='UTC'), 'N_active_anytime': 25976, 'seed_count': 34, 'visited_count': 34, 'beta': [1.0, 0.5459180944327432, 0.4313440936655467, 0.3754383383374634], 'layer_weight_sums': [5.012486548264671, 0.0, 0.0, 0.0], 'layers_index': [[19782, 20709, 2155, 12100, 5612, 12707, 7770, 14366, 15808, 11849, 1592, 782, 10403, 17320, 23032, 20291, 366, 25572, 23830, 13418, 23758, 1493, 7284, 2541, 19311, 7731, 13479, 13269, 19165, 14666, 14274, 13134, 5478, 11443], [], [], []], 'layers_situation_id': [[16209633, 16211936, 16150964, 16182487, 16161506, 16184006, 16169213, 16189888, 16195502, 16181911, 16149329, 16146450, 16178166, 16200860, 16221091, 16210897, 16144966, 16231451, 16224152, 16186330, 16223829, 16148978, 16167666, 16152045, 16208257, 16169109, 16186521, 16185725, 16207822, 16190913, 16189538, 16185212,

In [None]:
40.473089, -3.764716