In [47]:
# Libs
import json
import pandas as pd

In [48]:
# Load facts (count population by neighborhood, age and sex)
bcn_data_edat_sexe = pd.read_csv('../../data/opendata/facts/population/2023_pad_mdba_sexe_edat-1.csv')
bcn_data_edat_sexe.head(5)

Unnamed: 0,Data_Referencia,Codi_Districte,Nom_Districte,Codi_Barri,Nom_Barri,AEB,Valor,SEXE,EDAT_1
0,2023-01-01,1,Ciutat Vella,1,el Raval,1,11,1,0
1,2023-01-01,1,Ciutat Vella,1,el Raval,1,12,1,1
2,2023-01-01,1,Ciutat Vella,1,el Raval,1,15,1,2
3,2023-01-01,1,Ciutat Vella,1,el Raval,1,6,1,3
4,2023-01-01,1,Ciutat Vella,1,el Raval,1,10,1,4


In [49]:
# Load facts into DataFrame and set unknown totals to 0
df2 = pd.DataFrame(
    {
        "aeb_id": bcn_data_edat_sexe["AEB"],
        "district_id": bcn_data_edat_sexe["Codi_Districte"],
        "district_desc": bcn_data_edat_sexe["Nom_Districte"],
        "neighborhood_id": bcn_data_edat_sexe["Codi_Barri"],
        "neighborhood_desc": bcn_data_edat_sexe["Nom_Barri"],
        "age": bcn_data_edat_sexe["EDAT_1"],
        "sex": bcn_data_edat_sexe["SEXE"],
        "population": bcn_data_edat_sexe["Valor"],
    }
)
df2["population"] = pd.to_numeric(df2["population"], errors="coerce").fillna(0)
pd.concat([df2.head(3), df2.tail(3)])

Unnamed: 0,aeb_id,district_id,district_desc,neighborhood_id,neighborhood_desc,age,sex,population
0,1,1,Ciutat Vella,1,el Raval,0,1,11.0
1,1,1,Ciutat Vella,1,el Raval,1,1,12.0
2,1,1,Ciutat Vella,1,el Raval,2,1,15.0
46146,233,10,Sant Martí,73,la Verneda i la Pau,96,2,0.0
46147,233,10,Sant Martí,73,la Verneda i la Pau,97,2,0.0
46148,233,10,Sant Martí,73,la Verneda i la Pau,99,2,0.0


In [50]:
# Calculate the sum of 'total' for each 'id'
total_sum = (
    df2.groupby(
        [
            "aeb_id",
            "district_id",
            "district_desc",
            "neighborhood_id",
            "neighborhood_desc",
        ]
    )["population"]
    .sum()
    .reset_index()
)
pd.concat([total_sum.head(3), total_sum.tail(3)])

Unnamed: 0,aeb_id,district_id,district_desc,neighborhood_id,neighborhood_desc,population
0,1,1,Ciutat Vella,1,el Raval,2427.0
1,2,1,Ciutat Vella,1,el Raval,6257.0
2,3,1,Ciutat Vella,1,el Raval,5736.0
230,231,10,Sant Martí,73,la Verneda i la Pau,8143.0
231,232,10,Sant Martí,73,la Verneda i la Pau,6723.0
232,233,10,Sant Martí,73,la Verneda i la Pau,6192.0


In [51]:
# Export DataFrame to CSV file:
total_sum.to_csv("../../data/input/ds_population_by_district_neighborhood.csv", index=False)

In [52]:
# Generate population by district including GPS coordinates

with open("../../data/input/md_district_coordinates.geojson", "r") as f:
    bcn_md_district_coordinates = json.load(f)

df_population = pd.read_csv(
    "../../data/input/ds_population_by_district_neighborhood.csv"
)

# Extracting the data
rows = []
for feature in bcn_md_district_coordinates['features']:
    district = int(feature['properties']['DISTRICTE'])
    coordinates = feature['geometry']['coordinates']
    rows.append({'district_id': district, 'coordinates': coordinates})


# Creating the DataFrame
df = pd.DataFrame(rows)

# Drop duplicates: districts 03 and 05 are repeated in the original CSV
df = df.drop_duplicates(subset='district_id')

# Split the coordinates into two separate columns
df[['longitude', 'latitude']] = pd.DataFrame(df['coordinates'].tolist(), index=df.index)

# Merge the dataframes on district_id and DISTRICTE
merged_df = pd.merge(df_population, df, left_on='district_id', right_on='district_id')
merged_df = merged_df.groupby(['district_id', 'district_desc', 'latitude', 'longitude'], as_index=False)['population'].sum()

merged_df

Unnamed: 0,district_id,district_desc,latitude,longitude,population
0,1,Ciutat Vella,41.383877,2.176473,105481.0
1,2,Eixample,41.393079,2.164798,267841.0
2,3,Sants-Montjuïc,41.354217,2.176602,185377.0
3,4,Les Corts,41.387336,2.112869,81747.0
4,5,Sarrià-Sant Gervasi,41.417431,2.113922,149096.0
5,6,Gràcia,41.410158,2.1524,122300.0
6,7,Horta-Guinardó,41.433723,2.15047,174889.0
7,8,Nou Barris,41.44651,2.175508,173443.0
8,9,Sant Andreu,41.434197,2.193547,151319.0
9,10,Sant Martí,41.411869,2.2021,241888.0


In [53]:
# Export DataFrame to CSV file:
merged_df.to_csv("../../data/input/ds_population_by_district_coordinates.csv", index=False)

In [54]:
# Generate population by district including GPS coordinates


with open("../../data/input/md_neighborhood_coordinates.geojson", "r") as f:
    bcn_md_neighborhood_coordinates = json.load(f)

df_population = pd.read_csv(
    "../../data/input/ds_population_by_district_neighborhood.csv"
)

# Extracting the data
rows = []
for feature in bcn_md_neighborhood_coordinates['features']:
    neighborhood = int(feature['properties']['BARRI'])
    coordinates = feature['geometry']['coordinates']
    rows.append({'neighborhood_id': neighborhood, 'coordinates': coordinates})


# Creating the DataFrame
df = pd.DataFrame(rows)

# # Drop duplicates
df = df.drop_duplicates(subset='neighborhood_id')

# # Split the coordinates into two separate columns
df[['longitude', 'latitude']] = pd.DataFrame(df['coordinates'].tolist(), index=df.index)

# Merge the dataframes on neighborhood_id and neighborhoodE
merged_df = pd.merge(df_population, df, left_on='neighborhood_id', right_on='neighborhood_id')
merged_df = merged_df.groupby(['neighborhood_id', 'neighborhood_desc', 'latitude', 'longitude'], as_index=False)['population'].sum()

merged_df

Unnamed: 0,neighborhood_id,neighborhood_desc,latitude,longitude,population
0,1,el Raval,41.378617,2.170341,45296.0
1,2,el Barri Gòtic,41.380193,2.178292,24205.0
2,3,la Barceloneta,41.381715,2.191000,14086.0
3,4,"Sant Pere, Santa Caterina i la Ribera",41.386490,2.183194,21894.0
4,5,el Fort Pienc,41.397978,2.181523,34333.0
...,...,...,...,...,...
68,69,Diagonal Mar i el Front Marítim del Poblenou,41.405982,2.212373,13338.0
69,70,el Besòs i el Maresme,41.413503,2.216027,27783.0
70,71,Provençals del Poblenou,41.410239,2.202244,21204.0
71,72,Sant Martí de Provençals,41.415748,2.197213,26077.0


In [55]:
# Export DataFrame to CSV file:
merged_df.to_csv("../../data/input/ds_population_by_neighborhood_coordinates.csv", index=False)