In [2]:
import pandas as pd
import os
import json

In [12]:
culture_to_keep = ["VI", "VE", "TP", "T", "S", "PP", "PH", "PE", "PC","PA", "P", "L"]
cols = {
    "date_mutation": str,
    "nature_mutation": str,
    "valeur_fonciere": float,
    "code_commune": str,
    "id_parcelle": str,
    "code_nature_culture": str,
    "nature_culture": str,
    "code_nature_culture_speciale": str,
    "nature_culture_speciale": str,
    "surface_terrain": float,
    "longitude": float,
    "latitude": float,
}

def load_dvf(year):
    _ = pd.read_csv(
        f'https://files.data.gouv.fr/geo-dvf/latest/csv/{year}/full.csv.gz',
        compression="gzip",
        usecols=list(cols.keys()),
        dtype=cols,
    )
    _ = _.loc[_['code_nature_culture'].isin(culture_to_keep)]
    _['code_departement'] = _['code_commune'].apply(lambda commune: commune[:2] if commune[:2]!="97" else commune[:3])
    _["month"] = _["date_mutation"].str.slice(0, 7)
    return _

In [15]:
df = load_dvf(2024)
df

In [7]:
# df[['nature_culture', 'nature_culture_speciale']].value_counts(dropna=False).reset_index().to_csv('natures_speciales.csv', index=False)

In [16]:
def create_stats(df):
    dfs = []
    for echelle in [
        "departement",
#         "commune",
    ]:
        grouped = df.groupby(
            [f"code_{echelle}", "month", "nature_culture"]
        )["nature_culture"]

        nb = grouped.count()
        nb.name = "nb_mutations"
        nb = nb.reset_index()
        nb["echelle"] = echelle
        nb.rename({f"code_{echelle}": "code_geo"}, axis=1, inplace=True)
        dfs.append(nb)
    return pd.concat(dfs, ignore_index=True)

In [17]:
if os.path.isfile('stats.csv'):
    os.remove('stats.csv')
for year in range(2019, 2025):
    stats = create_stats(load_dvf(year))
    stats.to_csv(
        "stats.csv",
        index=False,
        mode="w" if year == 2019 else "a",
    )
    print("Done with", year)

Done with 2019
Done with 2020
Done with 2021
Done with 2022
Done with 2023
Done with 2024


In [5]:
stats = pd.read_csv('stats.csv',dtype={"code_geo": str})
# with open("stats.json", "w") as f:
#     json.dump(stats.to_dict(orient="records"), f)
stats

Unnamed: 0,code_geo,month,nature_culture,nb_mutations,echelle
0,01,2019-07,landes,140,departement
1,01,2019-07,prés,627,departement
2,01,2019-07,pâtures,11,departement
3,01,2019-07,sols,1636,departement
4,01,2019-07,terres,494,departement
...,...,...,...,...,...
34035,97,2024-06,prés,32,departement
34036,97,2024-06,pâtures,2,departement
34037,97,2024-06,sols,1321,departement
34038,97,2024-06,terres,1478,departement
