In [2]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor, early_stopping
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('../climate_danger/data_climate_danger.csv')
df.head()

Unnamed: 0,year_quarter,avg_weather_risk_score,code_commune_INSEE,code_departement,fire_score,year,dt,total_risk_score
0,2014T1,0.26,1001,1,0.0,2014,2014-01-01,0.13
1,2014T1,0.26,1002,1,0.0,2014,2014-01-01,0.13
2,2014T1,0.26,1004,1,0.0,2014,2014-01-01,0.13
3,2014T1,0.26,1005,1,0.0,2014,2014-01-01,0.13
4,2014T1,0.26,1006,1,0.0,2014,2014-01-01,0.13


In [4]:
df['code_departement'].nunique()

94

In [5]:
df['code_commune_INSEE'].nunique()

35434

In [6]:
df['code_commune_INSEE'].astype(str).str.len().value_counts()

code_commune_INSEE
5    1418692
4     140404
Name: count, dtype: int64

In [7]:
df['code_departement'].astype(str).str.len().value_counts()

code_departement
2    1418692
1     140404
Name: count, dtype: int64

In [8]:
df['code_commune_INSEE'] = df['code_commune_INSEE'].astype(str).str.zfill(5)
df['code_departement'] = df['code_departement'].astype(str).str.zfill(2)

In [9]:
print(df['code_commune_INSEE'].astype(str).str.len().value_counts())
df['code_departement'].astype(str).str.len().value_counts()

code_commune_INSEE
5    1559096
Name: count, dtype: int64


code_departement
2    1559096
Name: count, dtype: int64

In [10]:
df['code_commune_INSEE'] = df['code_commune_INSEE'].astype(int)

In [11]:
df['year'].unique()

array([2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024])

In [12]:
print(df.shape)
df.head()

(1559096, 8)


Unnamed: 0,year_quarter,avg_weather_risk_score,code_commune_INSEE,code_departement,fire_score,year,dt,total_risk_score
0,2014T1,0.26,1001,1,0.0,2014,2014-01-01,0.13
1,2014T1,0.26,1002,1,0.0,2014,2014-01-01,0.13
2,2014T1,0.26,1004,1,0.0,2014,2014-01-01,0.13
3,2014T1,0.26,1005,1,0.0,2014,2014-01-01,0.13
4,2014T1,0.26,1006,1,0.0,2014,2014-01-01,0.13


## Model by department

In [13]:
df_train_bis = df.groupby(['code_commune_INSEE','year'])['fire_score'].max().reset_index()
df_train_bis['code_commune_INSEE'] = df_train_bis['code_commune_INSEE'].astype('category')
df_train_bis.head()

Unnamed: 0,code_commune_INSEE,year,fire_score
0,1001,2014,0.0
1,1001,2015,0.0
2,1001,2016,0.0
3,1001,2017,0.0
4,1001,2018,0.0


In [14]:
# On ajoute 5 lags (2019-2023) pour prédire 2024 puis rolling

for lag in range(1, 4):
    df_train_bis[f'fire_score_lag{lag}'] = df_train_bis.groupby('code_commune_INSEE')['fire_score'].shift(lag)
df_train_bis.head()

  df_train_bis[f'fire_score_lag{lag}'] = df_train_bis.groupby('code_commune_INSEE')['fire_score'].shift(lag)
  df_train_bis[f'fire_score_lag{lag}'] = df_train_bis.groupby('code_commune_INSEE')['fire_score'].shift(lag)
  df_train_bis[f'fire_score_lag{lag}'] = df_train_bis.groupby('code_commune_INSEE')['fire_score'].shift(lag)


Unnamed: 0,code_commune_INSEE,year,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3
0,1001,2014,0.0,,,
1,1001,2015,0.0,0.0,,
2,1001,2016,0.0,0.0,0.0,
3,1001,2017,0.0,0.0,0.0,0.0
4,1001,2018,0.0,0.0,0.0,0.0


In [15]:
df.isna().sum() / df.shape[0]

year_quarter              0.0
avg_weather_risk_score    0.0
code_commune_INSEE        0.0
code_departement          0.0
fire_score                0.0
year                      0.0
dt                        0.0
total_risk_score          0.0
dtype: float64

In [16]:
df.shape

(1559096, 8)

In [17]:
# # Features pour le modèle
# features = ['year'] + [f'fire_score_lag{lag}' for lag in range(1, 6)]
# df_train = df[df['year'] <= 2023].dropna(subset=[f'fire_score_lag{lag}' for lag in range(1, 6)])
# X = df_train[features]
# y = df_train['fire_score']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [18]:
# groupby commune et year + .max

In [19]:
# df_train_bis = df.groupby(['code_commune_INSEE','year'])['fire_score'].max().reset_index()
# df_train_bis['code_commune_INSEE'] = df_train_bis['code_commune_INSEE'].astype('category')
# df_train_bis.head()

In [20]:
df_train_bis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 389774 entries, 0 to 389773
Data columns (total 6 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   code_commune_INSEE  389774 non-null  category
 1   year                389774 non-null  int64   
 2   fire_score          389774 non-null  float64 
 3   fire_score_lag1     354340 non-null  float64 
 4   fire_score_lag2     318906 non-null  float64 
 5   fire_score_lag3     283472 non-null  float64 
dtypes: category(1), float64(4), int64(1)
memory usage: 17.6 MB


In [21]:
df.head()

Unnamed: 0,year_quarter,avg_weather_risk_score,code_commune_INSEE,code_departement,fire_score,year,dt,total_risk_score
0,2014T1,0.26,1001,1,0.0,2014,2014-01-01,0.13
1,2014T1,0.26,1002,1,0.0,2014,2014-01-01,0.13
2,2014T1,0.26,1004,1,0.0,2014,2014-01-01,0.13
3,2014T1,0.26,1005,1,0.0,2014,2014-01-01,0.13
4,2014T1,0.26,1006,1,0.0,2014,2014-01-01,0.13


In [22]:
# for lag in range(1, 6):
#     df[f'fire_score_lag{lag}'] = df.groupby('code_departement')['fire_score'].shift(lag)

# df_train_bis = df.groupby(['code_commune_INSEE','year'] + [f'fire_score_lag{lag}' for lag in range(1, 6)])['fire_score'].max().reset_index() 
# df_train_bis.head()

In [23]:
# features = ['year'] + [f'fire_score_lag{lag}' for lag in range(1, 6)]
# df_train_bis = df_train_bis[df_train_bis['year'] <= 2024].dropna(subset=[f'fire_score_lag{lag}' for lag in range(1, 6)])
# df_train_bis['year'] = df_train_bis['year'].sort_values(ascending=True)
# df_train_bis.head()


In [24]:
df_train = df_train_bis[df_train_bis['year'] < 2022].dropna(subset=[f'fire_score_lag{lag}' for lag in range(1, 4)])
df_val = df_train_bis[(df_train_bis['year'] >= 2022) & (df_train_bis['year'] <= 2023)]
df_test = df_train_bis[df_train_bis['year'] == 2024]

In [25]:
df_train.shape

(177170, 6)

In [26]:
df_val.shape

(70868, 6)

In [27]:
df_test.shape

(35434, 6)

In [272]:
df_val

Unnamed: 0,code_commune_INSEE,year,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3
8,1001,2022,0.0,0.0,0.0,0.0
9,1001,2023,0.0,0.0,0.0,0.0
19,1002,2022,0.0,0.0,0.0,0.0
20,1002,2023,0.0,0.0,0.0,0.0
30,1004,2022,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
389750,95680,2023,0.0,1.0,0.0,0.0
389760,95682,2022,1.0,0.0,0.0,0.0
389761,95682,2023,0.0,1.0,0.0,0.0
389771,95690,2022,1.0,0.0,0.0,0.0


In [None]:
# X = df_train[features]
# y = df_train['fire_score']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [266]:
features = ['year'] + [f'fire_score_lag{lag}' for lag in range(1, 4)]

X_train = df_train[features]
y_train = df_train['fire_score']

X_val = df_val[features]
y_val = df_val['fire_score']

X_test = df_test[features]
y_test = df_test['fire_score']

In [269]:
X_val

Unnamed: 0,year,fire_score_lag1,fire_score_lag2,fire_score_lag3
8,2022,0.0,0.0,0.0
9,2023,0.0,0.0,0.0
19,2022,0.0,0.0,0.0
20,2023,0.0,0.0,0.0
30,2022,0.0,0.0,0.0
...,...,...,...,...
389750,2023,1.0,0.0,0.0
389760,2022,0.0,0.0,0.0
389761,2023,1.0,0.0,0.0
389771,2022,0.0,0.0,0.0


In [268]:
model = LGBMRegressor(
    n_estimators=350,
    objective='regression',
    random_state=42,
    verbose=-1
)
model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='mae',
    callbacks=[early_stopping(20)]
)


Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[11]	valid_0's l1: 0.388933	valid_0's l2: 0.546305


0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.1
,n_estimators,350
,subsample_for_bin,200000
,objective,'regression'
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [270]:
# Affichage clair
best_iter = model.best_iteration_
best_mae = model.best_score_['valid_0']['l1']
best_mse = model.best_score_['valid_0']['l2']
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print("\n================ Résumé entraînement LightGBM ================")
print(f"👉 Early stopping : arrêt automatique après 20 itérations sans amélioration.")
print(f"👉 Meilleure itération atteinte : {best_iter}")
print(f"    - Erreur absolue moyenne (MAE, l1) sur validation : {best_mae:.2f} ")
print(f"    - Erreur quadratique moyenne (MSE, l2) sur validation : {best_mse:.2f}")
print("---------------------------------------------------------------")
print(f"MAE  global : {mae:.2f} ")
print(f"RMSE global : {rmse:.2f} ")
print(f"R²   global : {r2:.3f}")
print('R2 score:', r2)
print("===============================================================\n")


👉 Early stopping : arrêt automatique après 20 itérations sans amélioration.
👉 Meilleure itération atteinte : 11
    - Erreur absolue moyenne (MAE, l1) sur validation : 0.39 
    - Erreur quadratique moyenne (MSE, l2) sur validation : 0.55
---------------------------------------------------------------
MAE  global : 0.18 
RMSE global : 0.35 
R²   global : 0.108
R2 score: 0.10792976111678021



In [273]:
y_pred

array([0.0777742 , 0.0777742 , 0.0777742 , ..., 0.14571856, 0.14571856,
       0.14571856], shape=(35434,))

In [138]:
df_train_bis.info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 447309 entries, 0 to 447308
Data columns (total 8 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   code_commune_INSEE  447309 non-null  int64  
 1   year                447309 non-null  int64  
 2   fire_score_lag1     447309 non-null  float64
 3   fire_score_lag2     447309 non-null  float64
 4   fire_score_lag3     447309 non-null  float64
 5   fire_score_lag4     447309 non-null  float64
 6   fire_score_lag5     447309 non-null  float64
 7   fire_score          447309 non-null  float64
dtypes: float64(6), int64(2)
memory usage: 27.3 MB


In [None]:
# Pour chaque commune, on part des prix 2020-2024 pour prédire 2025, puis rolling
df_2025 = df_train_bis[df_train_bis['year'] == 2025][['code_departement', 'fire_score']]
for lag in range(1, 6):
    df_2025[f'fire_score_lag{lag}'] = df.groupby('code_departement')['fire_score'].shift(lag).loc[df['year'] == 2024].values

df_pred = df_2025.dropna(subset=[f'fire_score_lag{lag}' for lag in range(1, 6)]).copy()
df_pred['fire_score_2024'] = df_pred['fire_score'].astype(float)
print(df_pred)
for an in range(2025, 2030):
    X_pred = pd.DataFrame({
        'annee': [an]*len(df_pred),
        'fire_score_lag1': df_pred['fire_score_lag1'],
        'fire_score_lag2': df_pred['fire_score_lag2'],
        'fire_score_lag3': df_pred['fire_score_lag3'],
        'fire_score_lag4': df_pred['fire_score_lag4'],
        'fire_score_lag5': df_pred['fire_score_lag5'],
    })
    df_pred[f'fire_score_{an}_pred'] = model.predict(X_pred)
    # On décale les lags pour l’année suivante (rolling forecast)
    for lag in range(5, 1, -1):
        df_pred[f'fire_score_lag{lag}'] = df_pred[f'fire_score_lag{lag-1}']
    df_pred['fire_score_lag1'] = df_pred[f'fire_score_{an}_pred']

In [72]:
df_pred.head()

Unnamed: 0,code_departement,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred
16400,1,0.0,0.999992,0.999991,1.000012,1.000007,8.2e-05,0.0,8.2e-05,1.000007,1.000012,0.999991,0.999992
16401,1,0.0,0.999992,0.999991,1.000012,1.000007,8.2e-05,0.0,8.2e-05,1.000007,1.000012,0.999991,0.999992
16402,1,0.0,0.999992,0.999991,1.000012,1.000007,8.2e-05,0.0,8.2e-05,1.000007,1.000012,0.999991,0.999992
16403,1,0.0,0.999992,0.999991,1.000012,1.000007,8.2e-05,0.0,8.2e-05,1.000007,1.000012,0.999991,0.999992
16404,1,0.0,0.999992,0.999991,1.000012,1.000007,8.2e-05,0.0,8.2e-05,1.000007,1.000012,0.999991,0.999992


In [None]:
# df_pred['code_commune_INSEE'] = df_pred['code_commune_INSEE'].astype(str).str.zfill(5)
# df_pred.head()

KeyError: 'code_commune_INSEE'

In [60]:
df_pred['department'] = df_pred['code_commune_INSEE'].str[:2]
print(df_pred.shape)
df_pred.head()

KeyError: 'code_commune_INSEE'

In [62]:
import geopandas as gpd

gdf = gpd.read_file(
    "https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements.geojson"
)

print(gdf.columns)

Index(['code', 'nom', 'geometry'], dtype='object')


In [63]:
gdf_merged = (
    df_pred
    .merge(
        gdf, 
        left_on='code_departement', 
        right_on='code', 
        how='inner'
        )
    .drop(columns='code')
)
print(gdf_merged.shape)
gdf_merged.head()

(141736, 15)


Unnamed: 0,code_departement,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred,nom,geometry
0,1,0.0,0.999344,0.999344,0.999344,0.999344,0.000129,0.0,0.000129,0.999344,0.999344,0.999344,0.999344,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
1,1,0.0,0.999344,0.999344,0.999344,0.999344,0.000129,0.0,0.000129,0.999344,0.999344,0.999344,0.999344,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
2,1,0.0,0.999344,0.999344,0.999344,0.999344,0.000129,0.0,0.000129,0.999344,0.999344,0.999344,0.999344,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
3,1,0.0,0.999344,0.999344,0.999344,0.999344,0.000129,0.0,0.000129,0.999344,0.999344,0.999344,0.999344,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
4,1,0.0,0.999344,0.999344,0.999344,0.999344,0.000129,0.0,0.000129,0.999344,0.999344,0.999344,0.999344,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."


In [None]:
import pandas as pd
import plotly.express as px

# Étape 1 : préparer un DataFrame long avec les prédictions
cols_pred = [f'fire_score_{year}_pred' for year in range(2025, 2030)]

df_long = gdf_merged.melt(
    id_vars=['code_departement', 'nom', 'geometry'],
    value_vars=cols_pred,
    var_name='year',
    value_name='fire_score_pred'
)

# Étape 2 : nettoyer la colonne année (ex : "avg_weather_risk_score_2025_pred" → 2025)
df_long['year'] = df_long['year'].str.extract(r'(\d{4})').astype(int)

# Étape 3 : agréger au niveau du département (moyenne des communes)
gdf_plot = df_long.groupby(['code_departement', 'nom', 'year']).agg({
    'fire_score_pred': 'mean'
}).reset_index()

# Étape 4 : fusionner avec le GeoJSON de départements (gdf = GeoDataFrame des départements)
gdf_plot = gdf_plot.merge(gdf[['code', 'geometry']], left_on='code_departement', right_on='code')

# Étape 5 : afficher la carte interactive
fig = px.choropleth(
    gdf_plot,
    geojson=gdf,
    locations='code_departement',
    featureidkey='properties.code',
    color='fire_score_pred',
    animation_frame='year',
    color_continuous_scale='Reds',
    range_color=(0, 3),
    labels={'fire_score_pred': 'Predicted Risk Score'},
    title='Predicted Fire Risk Score by Department (2025–2029)'
)

fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig.show()


In [26]:
df_long.head()

Unnamed: 0,code_departement,nom,geometry,year,fire_score_pred
0,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2025,0.000129
1,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2025,0.000129
2,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2025,0.000129
3,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2025,0.000129
4,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2025,0.000129


## Model by code INSEE

In [12]:
# On ajoute 5 lags (2019-2023) pour prédire 2024 puis rolling
for lag in range(1, 6):
    df[f'fire_score_lag{lag}'] = df.groupby('code_commune_INSEE')['fire_score'].shift(lag)
df.head()

Unnamed: 0,year_quarter,avg_weather_risk_score,code_commune_INSEE,code_departement,fire_score,year,dt,total_risk_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5
0,2014T1,0.26,1001,1,0.0,2014,2014-01-01,0.13,,,,,
1,2014T1,0.26,1002,1,0.0,2014,2014-01-01,0.13,,,,,
2,2014T1,0.26,1004,1,0.0,2014,2014-01-01,0.13,,,,,
3,2014T1,0.26,1005,1,0.0,2014,2014-01-01,0.13,,,,,
4,2014T1,0.26,1006,1,0.0,2014,2014-01-01,0.13,,,,,


In [13]:
df.isna().sum() / df.shape[0]

year_quarter              0.000000
avg_weather_risk_score    0.000000
code_commune_INSEE        0.000000
code_departement          0.000000
fire_score                0.000000
year                      0.000000
dt                        0.000000
total_risk_score          0.000000
fire_score_lag1           0.022727
fire_score_lag2           0.045455
fire_score_lag3           0.068182
fire_score_lag4           0.090909
fire_score_lag5           0.113636
dtype: float64

In [14]:
# Features pour le modèle
features = ['year'] + [f'fire_score_lag{lag}' for lag in range(1, 6)]
df_train = df[df['year'] <= 2023].dropna(subset=[f'fire_score_lag{lag}' for lag in range(1, 6)])
X = df_train[features]
y = df_train['fire_score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=42)

In [15]:
model = LGBMRegressor(
    n_estimators=350,
    objective='regression',
    random_state=42,
    verbose=-1
)
model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='mae',
    callbacks=[early_stopping(20)]
)

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[174]	valid_0's l1: 0.0785419	valid_0's l2: 0.0638127


0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.1
,n_estimators,350
,subsample_for_bin,200000
,objective,'regression'
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [16]:
# Affichage clair
best_iter = model.best_iteration_
best_mae = model.best_score_['valid_0']['l1']
best_mse = model.best_score_['valid_0']['l2']
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print("\n================ Résumé entraînement LightGBM ================")
print(f"👉 Early stopping : arrêt automatique après 20 itérations sans amélioration.")
print(f"👉 Meilleure itération atteinte : {best_iter}")
print(f"    - Erreur absolue moyenne (MAE, l1) sur validation : {best_mae:.2f} ")
print(f"    - Erreur quadratique moyenne (MSE, l2) sur validation : {best_mse:.2f}")
print("---------------------------------------------------------------")
print(f"MAE  global : {mae:.2f} ")
print(f"RMSE global : {rmse:.2f} ")
print(f"R²   global : {r2:.3f}")
print('R2 score:', r2)
print("===============================================================\n")


👉 Early stopping : arrêt automatique après 20 itérations sans amélioration.
👉 Meilleure itération atteinte : 174
    - Erreur absolue moyenne (MAE, l1) sur validation : 0.08 
    - Erreur quadratique moyenne (MSE, l2) sur validation : 0.06
---------------------------------------------------------------
MAE  global : 0.08 
RMSE global : 0.25 
R²   global : 0.453
R2 score: 0.4527867366570145



In [17]:
# Pour chaque commune, on part des prix 2020-2024 pour prédire 2025, puis rolling
df_2024 = df[df['year'] == 2024][['code_commune_INSEE', 'fire_score']]
for lag in range(1, 6):
    df_2024[f'fire_score_lag{lag}'] = df.groupby('code_commune_INSEE')['fire_score'].shift(lag).loc[df['year'] == 2024].values

df_pred = df_2024.dropna(subset=[f'fire_score_lag{lag}' for lag in range(1, 6)]).copy()
df_pred['fire_score_2024'] = df_pred['fire_score'].astype(float)

for an in range(2025, 2030):
    X_pred = pd.DataFrame({
        'annee': [an]*len(df_pred),
        'fire_score_lag1': df_pred['fire_score_lag1'],
        'fire_score_lag2': df_pred['fire_score_lag2'],
        'fire_score_lag3': df_pred['fire_score_lag3'],
        'fire_score_lag4': df_pred['fire_score_lag4'],
        'fire_score_lag5': df_pred['fire_score_lag5'],
    })
    df_pred[f'fire_score_{an}_pred'] = model.predict(X_pred)
    # On décale les lags pour l’année suivante (rolling forecast)
    for lag in range(5, 1, -1):
        df_pred[f'fire_score_lag{lag}'] = df_pred[f'fire_score_lag{lag-1}']
    df_pred['fire_score_lag1'] = df_pred[f'fire_score_{an}_pred']

In [18]:
df_pred.head()

Unnamed: 0,code_commune_INSEE,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred
16400,1001,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16401,1002,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16402,1004,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16403,1005,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16404,1006,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139


In [19]:
df_pred['code_commune_INSEE'] = df_pred['code_commune_INSEE'].astype(str).str.zfill(5)
df_pred.head()

Unnamed: 0,code_commune_INSEE,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred
16400,1001,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16401,1002,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16402,1004,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16403,1005,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139
16404,1006,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139


In [20]:
df_pred['department'] = df_pred['code_commune_INSEE'].str[:2]
print(df_pred.shape)
df_pred.head()

(141736, 14)


Unnamed: 0,code_commune_INSEE,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred,department
16400,1001,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1
16401,1002,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1
16402,1004,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1
16403,1005,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1
16404,1006,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1


In [21]:
import geopandas as gpd

gdf = gpd.read_file(
    "https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements.geojson"
)

print(gdf.columns)

Index(['code', 'nom', 'geometry'], dtype='object')


In [24]:
df_pred['code_department'] = df_pred['code_commune_INSEE'].str[:2]
print(df_pred.shape)
df_pred.head()

(141736, 15)


Unnamed: 0,code_commune_INSEE,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred,department,code_department
16400,1001,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1
16401,1002,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1
16402,1004,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1
16403,1005,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1
16404,1006,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1


In [26]:
gdf_merged = (
    df_pred
    .merge(
        gdf, 
        left_on='code_department', 
        right_on='code', 
        how='inner'
        )
    .drop(columns='code')
)
print(gdf_merged.shape)
gdf_merged.head()

(141736, 17)


Unnamed: 0,code_commune_INSEE,fire_score,fire_score_lag1,fire_score_lag2,fire_score_lag3,fire_score_lag4,fire_score_lag5,fire_score_2024,fire_score_2025_pred,fire_score_2026_pred,fire_score_2027_pred,fire_score_2028_pred,fire_score_2029_pred,department,code_department,nom,geometry
0,1001,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
1,1002,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
2,1004,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
3,1005,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
4,1006,0.0,2.01139,0.043232,0.058365,0.013986,0.016326,0.0,0.016326,0.013986,0.058365,0.043232,2.01139,1,1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."


In [None]:
import pandas as pd
import plotly.express as px

# Étape 1 : préparer un DataFrame long avec les prédictions
cols_pred = [f'fire_score_{year}_pred' for year in range(2025, 2030)]

df_long = gdf_merged.melt(
    id_vars=['code_department', 'nom', 'geometry'],
    value_vars=cols_pred,
    var_name='year',
    value_name='fire_score_pred'
)

# Étape 2 : nettoyer la colonne année (ex : "avg_weather_risk_score_2025_pred" → 2025)
df_long['year'] = df_long['year'].str.extract(r'(\d{4})').astype(int)

# Étape 3 : agréger au niveau du département (moyenne des communes)
gdf_plot = df_long.groupby(['code_department', 'nom', 'year']).agg({
    'fire_score_pred': 'mean'
}).reset_index()

# Étape 4 : fusionner avec le GeoJSON de départements (gdf = GeoDataFrame des départements)
gdf_plot = gdf_plot.merge(gdf[['code', 'geometry']], left_on='code_department', right_on='code')

# Étape 5 : afficher la carte interactive
fig = px.choropleth(
    gdf_plot,
    geojson=gdf,
    locations='code_department',
    featureidkey='properties.code',
    color='fire_score_pred',
    animation_frame='year',
    color_continuous_scale='Reds',
    range_color=(0, 3),
    labels={'fire_score_pred': 'Predicted Risk Score'},
    title='Predicted Fire Risk Score by Department (2025–2029)'
)

fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig.show()


In [29]:
gdf_plot.head()

Unnamed: 0,code_department,nom,year,fire_score_pred,code,geometry
0,1,Ain,2025,0.016326,1,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
1,1,Ain,2026,0.013986,1,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
2,1,Ain,2027,0.058365,1,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
3,1,Ain,2028,0.043232,1,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
4,1,Ain,2029,2.01139,1,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
