In [1]:
# Libs
import pandas as pd

In [2]:
# Load facts: Average gross taxable income per person (€/Year) for the city of Barcelona
bcn_data_income = pd.read_csv('../../data/opendata/facts/population/2021_atles_renda_bruta_persona.csv')
bcn_data_income.head(5)

Unnamed: 0,Any,Codi_Districte,Nom_Districte,Codi_Barri,Nom_Barri,Seccio_Censal,Import_Renda_Bruta_€
0,2021,1,Ciutat Vella,1,el Raval,1,13657
1,2021,1,Ciutat Vella,1,el Raval,2,10724
2,2021,1,Ciutat Vella,1,el Raval,3,11017
3,2021,1,Ciutat Vella,1,el Raval,4,13156
4,2021,1,Ciutat Vella,1,el Raval,5,11426


In [3]:
# Load facts into DataFrame and set unknown totals to 0
df = pd.DataFrame(
    {
        "compound_id": bcn_data_income.apply(
            lambda x: f'{x["Codi_Districte"]:02}{x["Seccio_Censal"]:03}', axis=1
        ),
        "cens_id": bcn_data_income["Seccio_Censal"],
        "district_id": bcn_data_income["Codi_Districte"],
        "district_desc": bcn_data_income["Nom_Districte"],
        "neighborhood_id": bcn_data_income["Codi_Barri"],
        "neighborhood_desc": bcn_data_income["Nom_Barri"],
        "income": pd.to_numeric(
            bcn_data_income["Import_Renda_Bruta_€"], errors="coerce"
        ).fillna(0),
    }
)

df["income"] = pd.to_numeric(df["income"], errors="coerce").fillna(0)
pd.concat([df.head(3), df.tail(3)])

Unnamed: 0,compound_id,cens_id,district_id,district_desc,neighborhood_id,neighborhood_desc,income
0,1001,1,1,Ciutat Vella,1,el Raval,13657
1,1002,2,1,Ciutat Vella,1,el Raval,10724
2,1003,3,1,Ciutat Vella,1,el Raval,11017
1065,10235,235,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,17305
1066,10236,236,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,17814
1067,10237,237,10,Sant Martí,71,Provençals del Poblenou,15888


In [4]:
# Calculate the average of 'total' for each 'id'
total_avg_cens = (
    df.groupby(
        [
            "compound_id",
            "district_id",
            "district_desc",
            "neighborhood_id",
            "neighborhood_desc",
        ]
    )["income"]
    .mean()
    .reset_index()
)
pd.concat([total_avg_cens.head(3), total_avg_cens.tail(3)])

Unnamed: 0,compound_id,district_id,district_desc,neighborhood_id,neighborhood_desc,income
0,1001,1,Ciutat Vella,1,el Raval,13657.0
1,1002,1,Ciutat Vella,1,el Raval,10724.0
2,1003,1,Ciutat Vella,1,el Raval,11017.0
1065,10235,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,17305.0
1066,10236,10,Sant Martí,69,Diagonal Mar i el Front Marítim del Poblenou,17814.0
1067,10237,10,Sant Martí,71,Provençals del Poblenou,15888.0


In [5]:
# Export DataFrame to CSV file:
total_avg_cens.to_csv("../../data/input/ds_income_by_registry.csv", index=False)