In [1]:
import os
import pandas as pd
import geopandas as gpd
import shapely.wkb
from shapely.geometry import Point
from sqlalchemy import create_engine, text

In [2]:
from dotenv import load_dotenv

load_dotenv()

DBNAME = os.getenv('DBNAME')
USER = os.getenv('USER')
PASSWORD = os.getenv('PASSWORD')
HOST = os.getenv('HOST')
PORT = os.getenv('PORT')
CONNEXION = f"postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{DBNAME}"

DBNAME_ANCIEN = os.getenv('DBNAME_ANCIEN')
USER_ANCIEN = os.getenv('USER_ANCIEN')
PASSWORD_ANCIEN = os.getenv('PASSWORD_ANCIEN')
HOST_ANCIEN = os.getenv('HOST_ANCIEN')
PORT_ANCIEN = os.getenv('PORT_ANCIEN')
CONNEXION_ANCIEN = f"postgresql+psycopg2://{USER_ANCIEN}:{PASSWORD_ANCIEN}@{HOST_ANCIEN}:{PORT_ANCIEN}/{DBNAME_ANCIEN}"

PATH_DATA_G = os.getenv('PATH_DATA_G')
G_FILOSOFI = 'filosofi'

### CODE / ECHELLE

In [3]:
# Création de l'engine SQLAlchemy
engine = create_engine(CONNEXION)

requete = text(
    """
    select 
    numero_siren::int as code,
    'epci' as echelle
    from hab.v_epcifp_500 
    union 
    select insee_com::int as code,
    'commune' as echelle
    from hab.commune_500 
    where insee_com like '34%' 
    order by code
    """
    )

# Tester la connexion
with engine.connect() as connection:
    result = connection.execute(requete)
    code_com_epci = pd.DataFrame(result.fetchall(), columns=result.keys())



engine = create_engine(CONNEXION)

requete = text(
    """
    select inseeiris::int as code,
    'iris' as echelle 
    from hab.iris_100 
    order by code
    """
    )

# Tester la connexion
with engine.connect() as connection:
    result = connection.execute(requete)
    code_iris = pd.DataFrame(result.fetchall(), columns=result.keys())

code = pd.concat([code_com_epci, code_iris], axis=0).drop_duplicates().reset_index(drop=True)
code['echelle'].value_counts()

echelle
iris       578
commune    341
epci        17
Name: count, dtype: int64

### ZONAGE GEOGRAPHQUE

In [None]:
# Création de l'engine SQLAlchemy
engine = create_engine(CONNEXION)

requete = text(
    """
    select *
    from hab.v_epcifp_500 
    """
    )

# Tester la connexion
with engine.connect() as connection:
    result = connection.execute(requete)
    zonage_epci = pd.DataFrame(result.fetchall(), columns=result.keys())


requete = text(
    """
    select *
    from hab.commune_500 
    where insee_com like '34%' 
    """
    )

# Tester la connexion
with engine.connect() as connection:
    result = connection.execute(requete)
    zonage_com = pd.DataFrame(result.fetchall(), columns=result.keys())


requete = text(
    """
    select *
    from hab.iris_100
    """
    )

# Tester la connexion
with engine.connect() as connection:
    result = connection.execute(requete)
    rows = result.fetchall()
    columns = result.keys()
    dict_rows = [dict(zip(columns, row)) for row in rows]
    for row in dict_rows:
        row['shape'] = shapely.wkb.loads(row['shape'], hex=True)
    zonage_iris = gpd.GeoDataFrame(dict_rows, geometry='shape', crs="EPSG:2154")
    zonage_iris = zonage_iris.to_crs(epsg=4326) # WGS 84 -> Lambert 93
    zonage_iris['inseeiris'] = zonage_iris['inseeiris'].astype(int)


## DECRET

In [5]:
G_DECRET = 'decret'
chemin = os.path.join(PATH_DATA_G, G_DECRET, "2023_decret.xlsx")
decret = pd.read_excel(chemin, sheet_name="3")

## AIRDNA

In [6]:
G_AIRDNA = 'airdna'

In [7]:
print("Chargement des données Airbnb...")
chemin = os.path.join(PATH_DATA_G, G_AIRDNA, 'offre_airdna_34_2022.xlsx')
bnb2022 = pd.read_excel(chemin)
bnb2022 = bnb2022[bnb2022['Liste logement']=='Entire home/apt']
bnb2022 = bnb2022[['INSEE','Communes','Code EPCI','EPCI','Longitude','Latitude','Revenu annuel €',
                             'Nb jours réservés','Nb total jours de disponibilité']]
bnb2022.columns = ['code_com','com','code_epci','epci','longitude','latitude','rev_annuel',
                   'nb_jours_reserv','nb_jours_dispo']

chemin = os.path.join(PATH_DATA_G, G_AIRDNA, 'offre_airdna_34_2023.xlsx')
bnb2023 = pd.read_excel(chemin)
bnb2023 = bnb2023[bnb2023['Nature logement']=='Entire home/apt']
bnb2023 = bnb2023[['INSEE','Communes','Code EPCI','EPCI','Longitude','Latitude','Revenu annuel â‚¬',
                             'Nb jours rÃ©servÃ©s','Nb total jours de disponibilitÃ©']]
bnb2023.columns = ['code_com','com','code_epci','epci','longitude','latitude','rev_annuel',
                   'nb_jours_reserv','nb_jours_dispo']
print("Chargement des données Airbnb terminé.")

Chargement des données Airbnb...
Chargement des données Airbnb terminé.


In [11]:
def traitement_bnb(bnb, code):
    """
    Fonction de traitement des données Airbnb
    """
    ### jointure IRIS
    bnb['geometry'] = bnb.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
    bnb_gdf = gpd.GeoDataFrame(bnb, geometry='geometry', crs="EPSG:4326")
    bnb_gdf = gpd.sjoin(bnb_gdf, zonage_iris, how="left", predicate="within")

    ### vérification de la jointure
    bnb_gdf["inseeiris_str"] = bnb_gdf["inseeiris"].astype(str)
    bnb_gdf["inseeiris_com"] = bnb_gdf["inseeiris_str"].str[:5]
    bnb_gdf["code_com_str"] = bnb_gdf["code_com"].astype(str)
    bnb_ss_iris_diff_iris = bnb_gdf[bnb_gdf["inseeiris_com"]==bnb_gdf["code_com_str"]]
    print(f"Nombre de logements Airbnb sans IRIS : {len(bnb_gdf) - len(bnb_ss_iris_diff_iris)}")

    ### aggrégation
    bnb_ss_iris_diff_iris_agg = bnb_ss_iris_diff_iris.groupby([code]).agg(
        nombre_reserv_120=('nb_jours_reserv', lambda x: (x >= 120).sum()),
        nombre_dispo_120=('nb_jours_dispo', lambda x: (x >= 120).sum()),
        nombre_total=('nb_jours_reserv', 'count'),
        jours_reserves=('nb_jours_reserv', 'sum'),
        revenu=('rev_annuel', 'sum'),
        # jours_reserves_120=('nb_jours_reserv', lambda x: (bnb2023_ss_iris_diff_iris['nb_jours_dispo'] >= 120).sum()),
        jours_reserves_120=('nb_jours_reserv', lambda x: x[bnb_ss_iris_diff_iris.loc[x.index, 'nb_jours_dispo'] >= 120].sum()),
        # revenu_120=('rev_annuel', lambda x: (bnb2023_ss_iris_diff_iris['nb_jours_dispo'] >= 120).sum())
        revenu_120=('rev_annuel', lambda x: x[bnb_ss_iris_diff_iris.loc[x.index, 'nb_jours_dispo'] >= 120].sum())
    ).reset_index()
    bnb_ss_iris_diff_iris_agg['code'] = bnb_ss_iris_diff_iris_agg[code].astype(int)
    bnb_ss_iris_diff_iris_agg.drop(columns=[code], inplace=True)
    
    return bnb_ss_iris_diff_iris_agg

In [12]:
bnb_iris_2022 = traitement_bnb(bnb2022, 'inseeiris')
bnb_iris_2023 = traitement_bnb(bnb2023, 'inseeiris')

bnb_com_2022 = traitement_bnb(bnb2022, 'code_com')
bnb_com_2023 = traitement_bnb(bnb2023, 'code_com')

bnb_epci_2022 = traitement_bnb(bnb2022, 'code_epci')
bnb_epci_2023 = traitement_bnb(bnb2023, 'code_epci')



Nombre de logements Airbnb sans IRIS : 194
Nombre de logements Airbnb sans IRIS : 165
Nombre de logements Airbnb sans IRIS : 194
Nombre de logements Airbnb sans IRIS : 165
Nombre de logements Airbnb sans IRIS : 194
Nombre de logements Airbnb sans IRIS : 165


In [13]:
bnb_2022 = pd.concat([bnb_iris_2022, bnb_com_2022, bnb_epci_2022], axis=0).drop_duplicates().reset_index(drop=True)
bnb_2023 = pd.concat([bnb_iris_2023, bnb_com_2023, bnb_epci_2023], axis=0).drop_duplicates().reset_index(drop=True)

In [14]:
bnb_final = pd.merge(bnb_2022, bnb_2023, how='outer', on='code', suffixes=('_2022', '_2023'))

In [15]:
bnb_final_final = pd.merge(code, bnb_final, how='left', on='code')

In [16]:
# Calculer les évolutions et les écarts
bnb_final_final_final = bnb_final_final.assign(
    evol_bnb_120=(bnb_final_final['nombre_dispo_120_2023'] - bnb_final_final['nombre_dispo_120_2022']) / bnb_final_final['nombre_dispo_120_2022'],
    ecart_bnb_120=bnb_final_final['nombre_dispo_120_2023'] - bnb_final_final['nombre_dispo_120_2022'],
    ecart_bnb_tot=bnb_final_final['nombre_total_2023'] - bnb_final_final['nombre_total_2022'],
    evol_bnb_tot=(bnb_final_final['nombre_total_2023'] - bnb_final_final['nombre_total_2022']) / bnb_final_final['nombre_total_2022'],
    ecart_jours=bnb_final_final['jours_reserves_2023'] - bnb_final_final['jours_reserves_2022'],
    ecart_revenu=bnb_final_final['revenu_2023'] - bnb_final_final['revenu_2022'],
    ecart_revenu_jours=(bnb_final_final['revenu_2023'] / bnb_final_final['jours_reserves_2023']) - (bnb_final_final['revenu_2022'] / bnb_final_final['jours_reserves_2022'])
)

In [17]:
bnb_final_final_final

Unnamed: 0,code,echelle,nombre_reserv_120_2022,nombre_dispo_120_2022,nombre_total_2022,jours_reserves_2022,revenu_2022,jours_reserves_120_2022,revenu_120_2022,nombre_reserv_120_2023,...,revenu_2023,jours_reserves_120_2023,revenu_120_2023,evol_bnb_120,ecart_bnb_120,ecart_bnb_tot,evol_bnb_tot,ecart_jours,ecart_revenu,ecart_revenu_jours
0,34001,commune,2.0,4.0,16.0,690.0,74625.0,455.0,48603.0,3.0,...,120060.0,793.0,81313.0,0.750000,3.0,3.0,0.187500,374.0,45435.0,4.686172
1,34002,commune,2.0,10.0,15.0,1028.0,177214.0,917.0,136338.0,3.0,...,228402.0,935.0,217818.0,0.100000,1.0,0.0,0.000000,-29.0,51188.0,56.243471
2,34003,commune,730.0,2663.0,5681.0,349231.0,39528819.0,264730.0,29208459.0,729.0,...,47218341.0,277452.0,33910387.0,0.079609,212.0,1228.0,0.216159,31245.0,7689522.0,10.915163
3,34004,commune,2.0,7.0,24.0,1457.0,885346.0,873.0,548588.0,10.0,...,1617070.0,2034.0,1459826.0,1.571429,11.0,4.0,0.166667,961.0,731724.0,61.113475
4,34005,commune,1.0,2.0,10.0,428.0,88184.0,208.0,52970.0,1.0,...,47268.0,205.0,28904.0,0.000000,0.0,-5.0,-0.500000,-119.0,-40916.0,-53.066509
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,343420000,iris,0.0,3.0,9.0,273.0,41789.0,172.0,23997.0,0.0,...,110938.0,340.0,89428.0,0.666667,2.0,1.0,0.111111,146.0,69149.0,111.695236
932,343430000,iris,0.0,1.0,20.0,683.0,105182.0,84.0,14349.0,0.0,...,124492.0,387.0,45836.0,5.000000,5.0,0.0,0.000000,99.0,19310.0,5.196931
933,343440101,iris,152.0,347.0,706.0,52884.0,4553978.0,41927.0,3639394.0,129.0,...,5062994.0,43516.0,3982483.0,0.167147,58.0,219.0,0.310198,1738.0,509016.0,6.578894
934,343440102,iris,280.0,650.0,1220.0,98197.0,8813928.0,78652.0,7049797.0,242.0,...,9882497.0,77856.0,7732345.0,0.055385,36.0,271.0,0.222131,471.0,1068569.0,10.401479


## FILOSOFI

In [None]:
chemin = os.path.join(PATH_DATA_G, G_FILOSOFI, 'filosofi_com_2020.xlsx')
filosofi_com_2020 = pd.read_excel(chemin, sheet_name=0)
chemin = os.path.join(PATH_DATA_G, G_FILOSOFI, 'filosofi_epci_2020.xlsx')
filosofi_epci_2020 = pd.read_excel(chemin)


filosofi = pd.concat([filosofi_com_2020, filosofi_epci_2020], ignore_index=True)

table = pd.merge(code_com_epci, filosofi, how='left', left_on="code", right_on="CODGEO")
table = table.drop(columns=['CODGEO'])