In [None]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split

df = pd.read_csv('df_merged_clean.csv', dtype={'code_commune_insee':str})
df_risk = pd.read_csv('data_climate_danger_merge_clean.csv', dtype={'code_commune_insee':str})
for col in ['code_commune_insee', 'code_postal']:
    if col in df.columns:
        df[col] = df[col].astype(str).str.zfill(5)
for col in ['code_commune_insee']:
    if col in df_risk.columns:
        df_risk[col] = df_risk[col].astype(str).str.zfill(5)
df = df.merge(df_risk[['code_commune_insee','annee','moyenne_risk_score','std_risk_score']], on=['code_commune_insee','annee'], how='left')
df = df[(df['annee']>=2014)&(df['annee']<=2024)].sort_values(['code_commune_insee','annee'])
for lag in range(1,6):
    df[f'prixm2moyen_lag{lag}'] = df.groupby('code_commune_insee')['prixm2moyen'].shift(lag)
df = df.dropna(subset=[f'prixm2moyen_lag{lag}' for lag in range(1,6)]+['latitude','longitude'])
features = ['annee','latitude','longitude']+[f'prixm2moyen_lag{lag}' for lag in range(1,6)]
X1 = df[features]
y1 = df['prixm2moyen']
X1_train,X1_test,y1_train,y1_test = train_test_split(X1,y1,test_size=0.13,random_state=42)
m1 = LGBMRegressor(n_estimators=350,objective='regression',random_state=42,verbose=-1)
m1.fit(X1_train,y1_train,eval_set=[(X1_test,y1_test)],eval_metric='mae',callbacks=[])
df['prix_pred'] = m1.predict(X1)
df['prix_pred_lag1'] = df.groupby('code_commune_insee')['prix_pred'].shift(1)
df['prix_pred_lag2'] = df.groupby('code_commune_insee')['prix_pred'].shift(2)
df['prix_pred_lag3'] = df.groupby('code_commune_insee')['prix_pred'].shift(3)
df['prix_pred_lag4'] = df.groupby('code_commune_insee')['prix_pred'].shift(4)
df['prix_pred_lag5'] = df.groupby('code_commune_insee')['prix_pred'].shift(5)
df2 = df.dropna(subset=['moyenne_risk_score','std_risk_score','prix_pred_lag1','prix_pred_lag2','prix_pred_lag3','prix_pred_lag4','prix_pred_lag5'])
features2 = ['annee','latitude','longitude','prix_pred']+[f'prix_pred_lag{i}' for i in range(1,6)]+['moyenne_risk_score','std_risk_score']
X2 = df2[features2]
y2 = df2['prixm2moyen']
X2_train,X2_test,y2_train,y2_test = train_test_split(X2,y2,test_size=0.13,random_state=42)
m2 = LGBMRegressor(n_estimators=300,objective='regression',random_state=42,verbose=-1)
m2.fit(X2_train,y2_train,eval_set=[(X2_test,y2_test)],eval_metric='mae',callbacks=[])
df2['prix_pred_stack'] = m2.predict(X2)
df2['variation_%'] = 100*(df2.groupby('code_commune_insee')['prix_pred_stack'].shift(-1)-df2['prix_pred_stack'])/df2['prix_pred_stack']
df2['decote_risk_%'] = 100*(df2['prix_pred']-df2['prix_pred_stack'])/df2['prix_pred']
cols_export = ['code_commune_insee','annee','latitude','longitude','prixm2moyen','prix_pred','prix_pred_stack','variation_%','decote_risk_%','moyenne_risk_score','std_risk_score']
df2[cols_export].to_csv('prix_variation_decote_stacked.csv',index=False,encoding='utf-8')
print('Export : prix_variation_decote_stacked.csv')

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('prix_variation_decote_stacked.csv', dtype={'code_commune_insee': str})

# Carte interactive - décote due au risque
geojson_url = "https://france-geojson.gregoiredavid.fr/repo/communes.geojson"
import json, urllib.request
with urllib.request.urlopen(geojson_url) as response:
    geojson = json.load(response)
fig1 = px.choropleth_mapbox(
    df,
    geojson=geojson,
    locations='code_commune_insee',
    featureidkey='properties.code',
    color='decote_risk_%',
    color_continuous_scale='RdBu',
    center={"lat": 46.6, "lon": 2.6},
    zoom=5,
    opacity=0.80,
    mapbox_style="carto-positron",
    title='Décote immobilière due au risque naturel (%)',
    hover_data={
        "annee": True,
        "prixm2moyen": ":.0f",
        "prix_pred": ":.0f",
        "prix_pred_stack": ":.0f",
        "decote_risk_%": ":.2f",
        "moyenne_risk_score": ":.2f",
        "std_risk_score": ":.2f",
    }
)
fig1.update_layout(width=1200, height=900, margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig1.show()

# Carte interactive - variation % prix
fig2 = px.choropleth_mapbox(
    df,
    geojson=geojson,
    locations='code_commune_insee',
    featureidkey='properties.code',
    color='variation_%',
    color_continuous_scale='RdYlGn',
    center={"lat": 46.6, "lon": 2.6},
    zoom=5,
    opacity=0.80,
    mapbox_style="carto-positron",
    title='Variation annuelle du prix au m² (%)',
    hover_data={
        "annee": True,
        "prix_pred_stack": ":.0f",
        "variation_%": ":.2f",
        "moyenne_risk_score": ":.2f"
    }
)
fig2.update_layout(width=1200, height=900, margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig2.show()

# Scatterplot décote vs score de risque
fig3 = px.scatter(
    df,
    x="moyenne_risk_score",
    y="decote_risk_%",
    color="std_risk_score",
    size="prix_pred_stack",
    hover_data=["code_commune_insee","annee"],
    title="Décote vs Score de Risque (toutes communes, toutes années)"
)
fig3.show()

# Histogramme de la décote
plt.figure(figsize=(10,5))
sns.histplot(df['decote_risk_%'].dropna(), bins=100, color='royalblue', kde=True)
plt.title("Distribution de la décote immobilière (%) liée au risque naturel")
plt.xlabel("Décote (%)")
plt.ylabel("Nombre de communes-années")
plt.grid()
plt.tight_layout()
plt.show()

# Heatmap de corrélation
plt.figure(figsize=(9,6))
corr = df[['prixm2moyen','prix_pred','prix_pred_stack','variation_%','decote_risk_%','moyenne_risk_score','std_risk_score']].corr()
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Corrélation entre variables principales et scores de risque")
plt.tight_layout()
plt.show()

# Résumé statistiques principales
print("\n--- Résumé statistiques décote ---")
print(df['decote_risk_%'].describe(percentiles=[.01, .1, .25, .5, .75, .9, .99]).to_string())
print("\n--- Top 10 décotes maxi ---")
print(df.sort_values('decote_risk_%', ascending=False).head(10)[['code_commune_insee','annee','decote_risk_%','moyenne_risk_score','prix_pred','prix_pred_stack']])

print("\n--- Top 10 variations négatives ---")
print(df.sort_values('variation_%').head(10)[['code_commune_insee','annee','variation_%','moyenne_risk_score','prix_pred_stack']])

print("\n--- Corrélation décote <-> risk_score ---")
print(df[['decote_risk_%','moyenne_risk_score','std_risk_score']].corr())

print("\nExport CSV prêt pour DataViz/PowerBI/Excel : prix_variation_decote_stacked.csv")