In [1]:
import json
import pandas as pd

In [2]:
# Read in the geojson file
data = None
with open("berney_divisions.geojson", "r") as f:
    data = json.load(f)

# Read website\public\data\bipartite_divisions_type_metiers.csv
df = pd.read_csv("bipartite_divisions_type_metiers.csv")

# Read data/1832_v4_preprocessed.csv
df_pop = pd.read_csv("../../../data/1832_v4_preprocessed.csv")

# Get the population of each division
population = df_pop.groupby("division").size().to_dict()

# Get the origin category for eachn division
origin = df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"]

# In the properties section, add the values
# For example, for division 1:
# "properties": {"id": 1, "name": "marterey", "jobs": {"agricole": 82, "artisanat": 93,...
for feature in data["features"]:
    division = feature["properties"]["id"]
    # Add the population
    feature["properties"]["population"] = population[division]
    division_df = df.query("division == @division")
    jobs = {}
    for index, row in division_df.iterrows():
        jobs[row["job_category"]] = row["Weight"]
    feature["properties"]["jobs"] = jobs

# In the properties section, add the origins values from the origin dataframe
# For example, for division 1:
# "properties": {"id": 1, "name": "marterey", "origins": {"aigle": 1, moudon: 6, ...}
for feature in data["features"]:
    division = feature["properties"]["id"]
    origins = {}
    for index, row in origin[division].items():
        origins[index] = row
    # Take the sum of all origins outside Lausanne from the origin dataframe
    origins["not_lausanne"] = int(origin[division].sum() - origin[division]["lausanne"])
    feature["properties"]["origins"] = origins

In [3]:
df.job_category.unique()

array(['administration', 'agricole', 'artisanat', 'commerce',
       'construction', 'rente', 'service'], dtype=object)

In [4]:
# For each division, compute the percentage of jobs in each category in the dataframe
# by dividing the number of jobs in each category by the total of population in the division
(df_pop.groupby(["division", "chef_vocation_class"]).count()["nom_rue"] / df_pop.groupby("division").size()).reset_index().groupby("chef_vocation_class").max()

Unnamed: 0_level_0,division,0
chef_vocation_class,Unnamed: 1_level_1,Unnamed: 2_level_1
administration,15,0.08134
agricole,18,0.657143
artisanat,18,0.308081
commerce,15,0.135
construction,18,0.135458
rente,17,0.412979
service,18,0.158103


In [5]:
# For each division, compute the percentage of jobs in each category in the dataframe
# by dividing the number of jobs in each category by the total of population in the division
(df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"] / df_pop.groupby("division").size()).reset_index().groupby("chef_origine_class").max()

Unnamed: 0_level_0,division,0
chef_origine_class,Unnamed: 1_level_1,Unnamed: 2_level_1
aigle,17,0.032609
angleterre,15,0.041298
aubonne,15,0.030303
avenches,13,0.014354
cossonay,17,0.065217
echallens,18,0.045455
france,17,0.093264
fribourg,18,0.085714
geneve,15,0.017699
grandson,14,0.01087


In [6]:
# Add a new origin category "not_lausanne" to the origin dataframe
df_pop["chef_origine_class"] = df_pop["chef_origine_class"].fillna("not_lausanne")

In [7]:
# Replace all origins that are not Lausanne with "not_lausanne"
df_pop["chef_origine_class"] = df_pop["chef_origine_class"].apply(lambda x: "not_lausanne" if x != "lausanne" else x)

df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"].reset_index().groupby("chef_origine_class").max()["nom_rue"]

chef_origine_class
lausanne        149
not_lausanne    370
Name: nom_rue, dtype: int64

In [8]:
# For each division, compute the percentage of jobs in each category in the dataframe
# by dividing the number of jobs in each category by the total of population in the division
(df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"] / df_pop.groupby("division").size()).reset_index().groupby("chef_origine_class").max()

Unnamed: 0_level_0,division,0
chef_origine_class,Unnamed: 1_level_1,Unnamed: 2_level_1
lausanne,18,0.54023
not_lausanne,18,0.8


In [9]:
# Save the new geojson file
with open("berney_divisions.geojson", "w") as f:
    json.dump(data, f)