In [None]:
import warnings
warnings.filterwarnings('ignore')

import glob
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

In [None]:
fichiers = sorted(glob.glob('dvf20*.csv'))
dfs = []
for f in fichiers:
    dftmp = pd.read_csv(f, dtype=str)
    dftmp.columns = [col.strip().replace(" ", "_").replace("-", "_").replace("é", "e").replace("É", "E").lower() for col in dftmp.columns]
    if 'insee_com' in dftmp.columns:
        dftmp = dftmp.rename(columns={'insee_com': 'code_commune_insee'})
    if 'codepostal' in dftmp.columns:
        dftmp = dftmp.rename(columns={'codepostal': 'code_postal'})
    dfs.append(dftmp)

df = pd.concat(dfs, ignore_index=True)
df = df.loc[:, ~df.columns.duplicated()]
for col in ['code_commune_insee', 'code_postal']:
    if col in df.columns:
        df[col] = df[col].astype(str).str.zfill(5)
df['annee'] = pd.to_numeric(df['annee'], errors='coerce').fillna(0).astype(int)
for c in ['nb_mutations', 'nbmaisons', 'nbapparts', 'propmaison', 'propappart', 'prixmoyen', 'prixm2moyen', 'surfacemoy']:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0)

colonnes_utiles = [
    'code_commune_insee', 'annee', 'nb_mutations', 'nbmaisons', 'nbapparts',
    'propmaison', 'propappart', 'prixmoyen', 'prixm2moyen', 'surfacemoy'
]
df = df[[col for col in colonnes_utiles if col in df.columns]].drop_duplicates()

df.to_csv('df_prix_clean.csv', index=False, encoding='utf-8')
print("Export : df_prix_clean.csv")

In [None]:
df_coord = pd.read_csv('ref_espace_communes.csv', dtype=str)
df_coord.columns = [col.strip().replace(" ", "_").replace("-", "_").replace("é", "e").replace("É", "E").lower() for col in df_coord.columns]
col_insee_coord = [col for col in df_coord.columns if "insee" in col][0]
df_coord = df_coord.rename(columns={col_insee_coord: 'code_commune_insee'})
df_coord['code_commune_insee'] = df_coord['code_commune_insee'].astype(str).str.zfill(5)
df_coord = df_coord.drop_duplicates(subset=['code_commune_insee'])

df_merged = pd.merge(df, df_coord[['code_commune_insee', 'latitude', 'longitude']], on='code_commune_insee', how='left')
df_merged['latitude'] = pd.to_numeric(df_merged['latitude'], errors='coerce')
df_merged['longitude'] = pd.to_numeric(df_merged['longitude'], errors='coerce')

df_merged.to_csv('df_merged_clean.csv', index=False, encoding='utf-8')
print("Export : df_merged_clean.csv")

In [None]:
df = pd.read_csv("df_merged_clean.csv", dtype={'code_commune_insee': str})
df = df.sort_values(['code_commune_insee', 'annee'])

# Features historiques (rien du futur)
df['prixm2moyen_lag1'] = df.groupby('code_commune_insee')['prixm2moyen'].shift(1)
df['prixm2moyen_lag2'] = df.groupby('code_commune_insee')['prixm2moyen'].shift(2)
df['prixm2moyen_roll3'] = (
    df.groupby('code_commune_insee')['prixm2moyen']
    .rolling(window=3).mean().shift(1).reset_index(level=0, drop=True)
)
df['var_1an'] = 100 * (df['prixm2moyen_lag1'] - df['prixm2moyen_lag2']) / df['prixm2moyen_lag2']
df['var_2an'] = 100 * (df['prixm2moyen_lag1'] - df['prixm2moyen_roll3']) / df['prixm2moyen_roll3']
df['nb_mutations_lag1'] = df.groupby('code_commune_insee')['nb_mutations'].shift(1)
df['trend_mut'] = (
    df.groupby('code_commune_insee')['nb_mutations']
    .rolling(window=3).mean().shift(1).reset_index(level=0, drop=True)
)
df = df.dropna().reset_index(drop=True)


In [None]:
train_years = list(range(2014, 2021))    # 2014-2020 inclus
val_years = [2021, 2022]                 # 2021-2022
test_years = [2023, 2024]                # 2023-2024

train_df = df[df['annee'].isin(train_years)]
val_df = df[df['annee'].isin(val_years)]
test_df = df[df['annee'].isin(test_years)]

In [None]:
import lightgbm as lgb

features = [
    'prixm2moyen_lag1', 'prixm2moyen_lag2', 'prixm2moyen_roll3',
    'var_1an', 'var_2an', 'nb_mutations_lag1', 'trend_mut'
]
target = 'prixm2moyen'

model = lgb.LGBMRegressor(n_estimators=150, random_state=42)
model.fit(train_df[features], train_df[target])

val_df = val_df.copy()
val_df['y_pred'] = model.predict(val_df[features])
test_df = test_df.copy()
test_df['y_pred'] = model.predict(test_df[features])

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

print("=== VALIDATION ===")
print("MAE :", mean_absolute_error(val_df[target], val_df['y_pred']))
print("RMSE:", np.sqrt(mean_squared_error(val_df[target], val_df['y_pred'])))
print("R2  :", r2_score(val_df[target], val_df['y_pred']))

print("\n=== TEST (futur réel) ===")
print("MAE :", mean_absolute_error(test_df[target], test_df['y_pred']))
print("RMSE:", np.sqrt(mean_squared_error(test_df[target], test_df['y_pred'])))
print("R2  :", r2_score(test_df[target], test_df['y_pred']))

In [None]:
communes = df.groupby('code_commune_insee').filter(
    lambda g: (2023 in g['annee'].values) and (2024 in g['annee'].values)
)['code_commune_insee'].unique()

df_2023 = df[df['annee'] == 2023].set_index('code_commune_insee')
df_2024 = df[df['annee'] == 2024].set_index('code_commune_insee')

results = []
import numpy as np

prixm2_2023 = df_2023.loc[communes, 'prixm2moyen'].values
prixm2_2024 = df_2024.loc[communes, 'prixm2moyen'].values
nbmut_2023 = df_2023.loc[communes, 'nb_mutations'].values
nbmut_2024 = df_2024.loc[communes, 'nb_mutations'].values

for year in range(2025, 2030):
    prixm2_lag1 = prixm2_2024
    prixm2_lag2 = prixm2_2023
    roll3 = (prixm2_lag1 + prixm2_lag2 + prixm2_lag1) / 3 
    var_1an = 100 * (prixm2_lag1 - prixm2_lag2) / prixm2_lag2
    var_2an = 100 * (prixm2_lag1 - roll3) / roll3
    nbmut_lag1 = nbmut_2024
    trend_mut = (nbmut_2024 + nbmut_2023 + nbmut_2024) / 3
    X_pred = np.column_stack([prixm2_lag1, prixm2_lag2, roll3, var_1an, var_2an, nbmut_lag1, trend_mut])
    y_pred = model.predict(X_pred)

    for i, commune in enumerate(communes):
        results.append({
            "code_commune_insee": commune,
            "annee": year,
            "prixm2moyen": y_pred[i]
        })

    # MAJ pour année suivante
    prixm2_2023 = prixm2_2024
    prixm2_2024 = y_pred
    nbmut_2023 = nbmut_2024
    nbmut_2024 = nbmut_2024 

df_forecast = pd.DataFrame(results)
df_forecast.to_csv("forecast_2025_2029.csv", index=False)
print(df_forecast.head())

In [None]:
historique = df[['code_commune_insee', 'annee', 'prixm2moyen']].copy()
all_years = pd.concat([historique, df_forecast], ignore_index=True)
all_years.to_csv("prixm2moyen_2014_2029.csv", index=False)

In [None]:
import folium
from folium.plugins import MarkerCluster
import branca.colormap as cm

# Colormap robuste : "YlOrRd_09" (jaune→rouge)
min_price, max_price = df_map_clean['prix_2029'].min(), df_map_clean['prix_2029'].max()
colormap = cm.linear.YlOrRd_09.scale(min_price, max_price)

m = folium.Map(location=[46.8, 2.5], zoom_start=5, tiles='cartodbpositron')
marker_cluster = MarkerCluster().add_to(m)

for _, row in df_map_clean.iterrows():
    html = f"""
    <b>Code INSEE :</b> {row['code_commune_insee']}<br>
    <b>Prix 2024 :</b> {row['prix_2024']:.0f} €<br>
    <b>Prix 2025 :</b> {row['prix_2025']:.0f} €<br>
    <b>Prix 2026 :</b> {row['prix_2026']:.0f} €<br>
    <b>Prix 2027 :</b> {row['prix_2027']:.0f} €<br>
    <b>Prix 2028 :</b> {row['prix_2028']:.0f} €<br>
    <b>Prix 2029 :</b> {row['prix_2029']:.0f} €<br>
    <b>Var 2029 (%):</b> {row['var_2029']:.1f} %
    """
    popup = folium.Popup(html, max_width=350)
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=4,
        popup=popup,
        fill=True,
        fill_opacity=0.8,
        color=colormap(row['prix_2029']),
        fill_color=colormap(row['prix_2029'])
    ).add_to(marker_cluster)

colormap.caption = 'Prix au m² prédit en 2029 (€)'
colormap.add_to(m)

m.save('carte_france_folium.html')
m 

In [None]:
fig = px.scatter_mapbox(
    df_map_clean,
    lat="latitude", lon="longitude",
    color="prix_2029",
    size="taille",
    color_continuous_scale="Turbo",
    size_max=7,
    zoom=3.5, 
    opacity=0.8,
    hover_data={
        "code_commune_insee": True,
        "prix_2024": ':.0f',
        "prix_2025": ':.0f',
        "prix_2026": ':.0f',
        "prix_2027": ':.0f',
        "prix_2028": ':.0f',
        "prix_2029": ':.0f',
        "var_2025": ':.1f',
        "var_2026": ':.1f',
        "var_2027": ':.1f',
        "var_2028": ':.1f',
        "var_2029": ':.1f',
    },
    title="Prix au m² (2024 réel + 2025-2029 prédit) — Carte France Grand Angle"
)
fig.update_layout(
    mapbox_style="carto-darkmatter",
    mapbox_center={"lat": 46.8, "lon": 2.5},
    coloraxis_colorbar=dict(
        title="Prix 2029 (€)",
        tickvals=[2000, 4000, 6000, 8000],
        len=0.5
    )
)
fig.show()