# Dashboard Investissement Immobilier ‚Äî Notebook (IDF)

Ce notebook unifie tes 4 notebooks en **une interface unique** avec :
- **Filtres** (surface, budget, prix/m¬≤, zone, d√©partement, ann√©es, rendement, etc.)
- **Rendus interactifs avec survol (hover)** gr√¢ce √† Plotly (valeurs visibles au survol)
- **Top communes** (Top N ajustable)
- **Analyses prix & rendements**
- **Vue d√©partements**

‚û°Ô∏è Ex√©cute les cellules dans l‚Äôordre (ou `Run All`).
Si `plotly` ou `ipywidgets` ne sont pas install√©s :
```bash
pip install plotly ipywidgets
jupyter nbextension enable --py widgetsnbextension
```


In [5]:
import warnings, os, sys, io
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as W
from IPython.display import display, HTML

warnings.filterwarnings('ignore')


In [6]:
try:
    import data_cleaner_advanced as dca
except Exception as e:
    raise ImportError("data_cleaner_advanced.py introuvable: " + str(e))

RAW_DIR = '../Projet-Data-science-Investissement-immobilier/data/raw'
CLEAN_DIR = '../Projet-Data-science-Investissement-immobilier/data/clean'

df_unifie, df_loyers, df_gares = dca.quick_load_advanced(
    raw_dir=RAW_DIR,
    clean_dir=CLEAN_DIR,
    force_refresh=False
)
len(df_unifie), df_unifie.columns[:10]


üßπ NETTOYAGE AVANC√â DES DONN√âES



FileNotFoundError: ‚ùå Aucun fichier DVF trouv√© dans c:\Users\KenziLali\OneDrive\iCloudDrive\Etudes\ING\ING4\S7\Data science\Projet-Data-science-Investissement-immobilier\Projet-Data-science-Investissement-immobilier\data\raw

In [None]:
PERSONA = {
    'nom': 'Manager IT', 'budget_max': 200_000,
    'surface_min': 15, 'surface_max': 65,
    'objectif_rendement_net': 4.5
}

def clip_q(s: pd.Series, qlow=0.01, qhigh=0.99):
    s = pd.to_numeric(s, errors='coerce').dropna()
    if s.empty: return s
    lo, hi = s.quantile([qlow, qhigh])
    return s.clip(lower=lo, upper=hi)

def ensure_geo(df: pd.DataFrame) -> pd.DataFrame:
    d = df.copy()
    if 'code_departement' not in d.columns and 'code_postal' in d.columns:
        d['code_departement'] = d['code_postal'].astype(str).str[:2]
    if 'zone_geo' not in d.columns and 'code_departement' in d.columns:
        map_zone = {
            '75':'Paris','92':'Petite Couronne','93':'Petite Couronne','94':'Petite Couronne',
            '77':'Grande Couronne','78':'Grande Couronne','91':'Grande Couronne','95':'Grande Couronne'
        }
        d['zone_geo'] = d['code_departement'].map(map_zone)
    return d

def compute_yields(d: pd.DataFrame, loyer_m2: float, charges_pct: float) -> pd.DataFrame:
    d = d.copy()
    for c in ['valeur_fonciere','surface_reelle_bati','prix_m2']:
        if c in d.columns: d[c] = pd.to_numeric(d[c], errors='coerce')
    loyer_annuel = loyer_m2 * d['surface_reelle_bati'] * 12
    loyer_net = loyer_annuel * (1 - charges_pct)
    d['rendement_brut'] = (loyer_annuel / d['valeur_fonciere']) * 100
    d['rendement_net']  = (loyer_net / d['valeur_fonciere']) * 100
    d.replace([np.inf, -np.inf], np.nan, inplace=True)
    return d

def apply_filters(df_base: pd.DataFrame, cfg: dict) -> pd.DataFrame:
    d = ensure_geo(df_base)
    d = d[d['surface_reelle_bati'].between(cfg['surf'][0], cfg['surf'][1])]
    d = d[d['valeur_fonciere'].between(cfg['p_total'][0]*1000, cfg['p_total'][1]*1000)]
    d = d[d['prix_m2'].between(cfg['p_m2'][0], cfg['p_m2'][1])]
    d = d[(d['annee'] >= cfg['annees'][0]) & (d['annee'] <= cfg['annees'][1])]
    if cfg['zone'] != '(Toutes)': d = d[d['zone_geo'] == cfg['zone']]
    if cfg['dept'] != '(Tous)': d = d[d['code_departement'] == cfg['dept']]
    if cfg['outliers'] and len(d) > 50:
        Q1, Q3 = d['prix_m2'].quantile(0.25), d['prix_m2'].quantile(0.75)
        IQR = Q3 - Q1
        d = d[d['prix_m2'].between(Q1 - 2*IQR, Q3 + 2*IQR)]
    d = compute_yields(d, cfg['loyer'], cfg['charges']/100)
    d = d[d['rendement_net'] >= cfg['rdt_min']]
    if len(d) > 50:
        lo, hi = d['prix_m2'].quantile([0.01, 0.99])
        d = d[(d['prix_m2'] >= lo) & (d['prix_m2'] <= hi)]
    return d


In [None]:
w_surface  = W.IntRangeSlider(description='Surface (m¬≤)', min=10, max=200, value=[15,65], step=1, layout=W.Layout(width='95%'))
w_total    = W.IntRangeSlider(description='Budget (k‚Ç¨)', min=30, max=500, value=[50,200], step=1, layout=W.Layout(width='95%'))
w_pm2      = W.IntRangeSlider(description='Prix/m¬≤ (‚Ç¨)', min=1500, max=20000, value=[3000,12000], step=100, layout=W.Layout(width='95%'))
w_zone     = W.Dropdown(description='Zone', options=['(Toutes)','Paris','Petite Couronne','Grande Couronne'], value='(Toutes)')
w_dept     = W.Dropdown(description='D√©pt', options=['(Tous)','75','77','78','91','92','93','94','95'], value='(Tous)')
w_loyer    = W.FloatSlider(description='Loyer ‚Ç¨/m¬≤', min=10, max=50, step=0.5, value=22)
w_charges  = W.FloatSlider(description='Charges %', min=0, max=40, step=1, value=25)
w_rdt_min  = W.FloatSlider(description='Rdt min %', min=0, max=10, step=0.1, value=PERSONA['objectif_rendement_net'])
w_years    = W.IntRangeSlider(description='Ann√©es', min=2019, max=2025, value=[2019,2025])
w_outliers = W.Checkbox(description='Supprimer outliers', value=True)
w_topn     = W.IntSlider(description='Top N', min=5, max=100, step=1, value=15)
btn_apply  = W.Button(description='Appliquer', button_style='success')
btn_reset  = W.Button(description='Reset', button_style='warning')
btn_export = W.Button(description='Exporter CSV')

sidebar = W.VBox([
    W.HTML('<h3>üéõÔ∏è Filtres</h3>'),
    w_surface, w_total, w_pm2,
    w_zone, w_dept,
    w_loyer, w_charges, w_rdt_min,
    w_years, w_outliers,
    w_topn,
    W.HBox([btn_apply, btn_reset, btn_export])
], layout=W.Layout(width='28%'))


In [None]:
out_kpi   = W.Output()
out_top   = W.Output()
out_price = W.Output()
out_rdt   = W.Output()
out_dept  = W.Output()

display(W.HBox([sidebar, W.VBox([out_kpi, out_top, out_price, out_rdt, out_dept], layout=W.Layout(width='72%'))]))


In [None]:
def current_cfg():
    return {
        'surf': tuple(w_surface.value),
        'p_total': tuple(w_total.value),
        'p_m2': tuple(w_pm2.value),
        'zone': w_zone.value,
        'dept': w_dept.value,
        'loyer': w_loyer.value,
        'charges': w_charges.value,
        'rdt_min': w_rdt_min.value,
        'annees': tuple(w_years.value),
        'outliers': w_outliers.value,
        'topn': w_topn.value,
    }

def render_all():
    cfg = current_cfg()
    d = apply_filters(df_unifie, cfg)

    with out_kpi:
        out_kpi.clear_output(wait=True)
        n = len(d)
        prix_med = d['prix_m2'].median() if 'prix_m2' in d.columns else np.nan
        rdt_med = d['rendement_net'].median() if 'rendement_net' in d.columns else np.nan
        surf_med = d['surface_reelle_bati'].median() if 'surface_reelle_bati' in d.columns else np.nan
        display(HTML(f"""
        <div style='display:flex;gap:12px'>
          <div style='flex:1;background:#1e293b;color:white;padding:12px;border-radius:10px'><div>Transactions</div><div style='font-size:26px;font-weight:700'>{n:,}</div></div>
          <div style='flex:1;background:#0ea5e9;color:white;padding:12px;border-radius:10px'><div>Prix/m¬≤ m√©dian</div><div style='font-size:26px;font-weight:700'>{prix_med:,.0f} ‚Ç¨</div></div>
          <div style='flex:1;background:#f59e0b;color:white;padding:12px;border-radius:10px'><div>Rendement net m√©dian</div><div style='font-size:26px;font-weight:700'>{rdt_med:.2f} %</div></div>
          <div style='flex:1;background:#ef4444;color:white;padding:12px;border-radius:10px'><div>Surface m√©diane</div><div style='font-size:26px;font-weight:700'>{surf_med:.0f} m¬≤</div></div>
        </div>
        """.replace(',', ' ')))

        if 'prix_m2' in d.columns:
            p = clip_q(d['prix_m2'])
            fig = px.histogram(p, nbins=40, labels={'value':'Prix/m¬≤ (‚Ç¨)'})
            fig.update_traces(hovertemplate='Bin: %{x}<br>Count: %{y}<extra></extra>')
            med = float(p.median()) if len(p)>0 else None
            if med:
                fig.add_vline(x=med, line_dash='dash', line_color='red', annotation_text=f"M√©diane {med:,.0f} ‚Ç¨".replace(',', ' '))
            fig.update_layout(height=300, template='plotly_dark')
            fig.show()

    with out_top:
        out_top.clear_output(wait=True)
        if len(d)==0:
            display(HTML('<em>Aucune donn√©e.</em>'))
        else:
            top = (d.groupby(['nom_commune','code_postal'], as_index=False)
                     .agg(nb=('prix_m2','count'), prix_m2_med=('prix_m2','median'),
                          surf_med=('surface_reelle_bati','median'), prix_med=('valeur_fonciere','median'),
                          rdt_net=('rendement_net','median'))
                     .sort_values('rdt_net', ascending=False)
                     .head(cfg['topn']))
            display(HTML('<h3>üèÜ Top communes (rendement net m√©dian)</h3>'))
            display(top.style.format({'prix_m2_med':'{:.0f}','surf_med':'{:.0f}','prix_med':'{:.0f}','rdt_net':'{:.2f}'}))

    with out_price:
        out_price.clear_output(wait=True)
        if len(d) == 0:
            display(HTML('<em>Aucune donn√©e pour les graphiques de prix.</em>'))
        else:
            ds = d.dropna(subset=['surface_reelle_bati','prix_m2']).copy()
            if len(ds) > 4000: ds = ds.sample(4000, random_state=42)
            hover = {
                'nom_commune': True,
                'code_postal': True,
                'surface_reelle_bati': ':.0f',
                'prix_m2': ':.0f',
                'rendement_net': ':.2f'
            }
            fig1 = px.scatter(ds, x='surface_reelle_bati', y='prix_m2', color='rendement_net',
                               labels={'surface_reelle_bati':'Surface (m¬≤)', 'prix_m2':'Prix/m¬≤ (‚Ç¨)', 'rendement_net':'Rdt net (%)'},
                               hover_data=hover, color_continuous_scale='RdYlGn')
            fig1.update_traces(marker=dict(line=dict(width=0.3, color='white')),
                               hovertemplate='Commune: %{customdata[0]} (%{customdata[1]})<br>Surface: %{x:.0f} m¬≤<br>Prix/m¬≤: %{y:,.0f} ‚Ç¨<br>Rdt net: %{marker.color:.2f}%<extra></extra>')
            fig1.update_layout(height=420, template='plotly_dark', title='Prix/m¬≤ vs Surface')
            fig1.show()

            p = clip_q(d['prix_m2']) if 'prix_m2' in d.columns else pd.Series(dtype=float)
            if len(p)>0:
                fig2 = px.histogram(p, nbins=40, labels={'value':'Prix/m¬≤ (‚Ç¨)'})
                med = float(p.median()); fig2.add_vline(x=med, line_dash='dash', line_color='red', annotation_text=f"M√©diane {med:,.0f} ‚Ç¨".replace(',', ' '))
                fig2.update_traces(hovertemplate='Bin: %{x}<br>Count: %{y}<extra></extra>')
                fig2.update_layout(height=300, template='plotly_dark', title='Distribution des prix/m¬≤ (1‚Äì99%)')
                fig2.show()

    with out_rdt:
        out_rdt.clear_output(wait=True)
        if 'rendement_net' in d.columns and d['rendement_net'].notna().any():
            r = d['rendement_net'].clip(upper=12).dropna()
            fig3 = px.histogram(r, nbins=40, labels={'value':'Rendement net (%)'})
            fig3.add_vline(x=float(r.median()), line_dash='dash', line_color='red', annotation_text=f"M√©diane {r.median():.2f}%")
            fig3.add_vline(x=PERSONA['objectif_rendement_net'], line_dash='dot', line_color='orange', annotation_text=f"Objectif {PERSONA['objectif_rendement_net']}%")
            fig3.update_traces(hovertemplate='Bin: %{x:.2f}<br>Count: %{y}<extra></extra>')
            fig3.update_layout(height=320, template='plotly_dark', title='Distribution des rendements nets')
            fig3.show()

            if 'zone_geo' in d.columns:
                z = d.groupby('zone_geo')['rendement_net'].median().sort_values(ascending=True)
                fig4 = px.bar(z, orientation='h', labels={'value':'Rendement net m√©dian (%)','index':'Zone'})
                fig4.update_traces(hovertemplate='%{y}: %{x:.2f}%<extra></extra>')
                fig4.update_layout(height=320, template='plotly_dark', title='Rendement par zone')
                fig4.show()

    with out_dept:
        out_dept.clear_output(wait=True)
        if 'code_departement' in d.columns and d['code_departement'].notna().any():
            g = d.groupby('code_departement')
            prix_med = g['prix_m2'].median()
            rdt_med = g['rendement_net'].median() if 'rendement_net' in d.columns else None
            tab = pd.DataFrame({'prix_med': prix_med, 'nb': g['prix_m2'].count()})
            if rdt_med is not None: tab['rdt_med'] = rdt_med
            tab = tab.sort_values('rdt_med' if 'rdt_med' in tab.columns else 'nb', ascending=False).head(8)

            fig5 = px.bar(tab.sort_values('prix_med'), x='prix_med', y=tab.index.astype(str), orientation='h', labels={'prix_med':'Prix/m¬≤ m√©dian (‚Ç¨)', 'index':'D√©partement'})
            fig5.update_traces(hovertemplate='Dpt %{y}: %{x:,.0f} ‚Ç¨<extra></extra>')
            fig5.update_layout(height=360, template='plotly_dark', title='Prix m√©dian par d√©partement')
            fig5.show()

            if 'rdt_med' in tab.columns:
                fig6 = px.bar(tab.sort_values('rdt_med'), x='rdt_med', y=tab.index.astype(str), orientation='h', labels={'rdt_med':'Rendement net m√©dian (%)', 'index':'D√©partement'})
                fig6.update_traces(hovertemplate='Dpt %{y}: %{x:.2f}%<extra></extra>')
                fig6.update_layout(height=360, template='plotly_dark', title='Rendement par d√©partement')
                fig6.show()

render_all()

def on_apply(_):
    render_all()

def on_reset(_):
    w_surface.value = (PERSONA['surface_min'], PERSONA['surface_max'])
    w_total.value = (50, 200)
    w_pm2.value = (3000, 12000)
    w_zone.value = '(Toutes)'
    w_dept.value = '(Tous)'
    w_loyer.value = 22
    w_charges.value = 25
    w_rdt_min.value = PERSONA['objectif_rendement_net']
    w_years.value = (2019, 2025)
    w_outliers.value = True
    w_topn.value = 15
    render_all()

def on_export(_):
    cfg = current_cfg()
    d = apply_filters(df_unifie, cfg)
    out_fp = Path('investissement_persona.csv')
    d.to_csv(out_fp, index=False, encoding='utf-8-sig')
    display(HTML(f"‚úÖ Export√©: <b>{out_fp.resolve()}</b> ({len(d):,} lignes)".replace(',', ' ')))

btn_apply.on_click(on_apply)
btn_reset.on_click(on_reset)
btn_export.on_click(on_export)
