In [22]:
import json
import pandas as pd

In [66]:
# Read in the geojson file
data = None
with open("berney_divisions.geojson", "r") as f:
    data = json.load(f)

# Read website\public\data\bipartite_divisions_type_metiers.csv
df = pd.read_csv("bipartite_divisions_type_metiers.csv")

# Read data/1832_v4_preprocessed.csv
df_pop = pd.read_csv("../../../data/1832_v4_preprocessed.csv")

df_area = pd.read_csv("./area.csv")

# Get the population of each division
population = df_pop.groupby("division").size().to_dict()

# Get the origin category for eachn division
origin = df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"]

# In the properties section, add the values
# For example, for division 1:
# "properties": {"id": 1, "name": "marterey", "jobs": {"agricole": 82, "artisanat": 93,...
for feature in data["features"]:
    division = feature["properties"]["id"]
    # Add the population
    feature["properties"]["population"] = population[division]
    division_df = df.query("division == @division")
    jobs = {}
    for index, row in division_df.iterrows():
        jobs[row["job_category"]] = row["Weight"]
    feature["properties"]["jobs"] = jobs

# In the properties section, add the origins values from the origin dataframe
# For example, for division 1:
# "properties": {"id": 1, "name": "marterey", "origins": {"aigle": 1, moudon: 6, ...}
for feature in data["features"]:
    division = feature["properties"]["id"]
    origins = {}
    for index, row in origin[division].items():
        origins[index] = row
    # Take the sum of all origins outside Lausanne from the origin dataframe
    origins["not_lausanne"] = int(origin[division].sum() - origin[division]["lausanne"])
    feature["properties"]["origins"] = origins
    
    # Add the area
    feature["properties"]["area"] = df_area.query("division == @division")["area"].values[0]

In [67]:
data

{'type': 'FeatureCollection',
 'name': 'new',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'id': 1,
    'name': 'marterey',
    'jobs': {'administration': 5,
     'agricole': 82,
     'artisanat': 93,
     'commerce': 17,
     'construction': 39,
     'rente': 110,
     'service': 77},
    'population': 504,
    'origins': {'aigle': 3,
     'angleterre': 12,
     'aubonne': 10,
     'avenches': 1,
     'cossonay': 17,
     'echallens': 12,
     'france': 21,
     'fribourg': 2,
     'geneve': 3,
     'italie': 3,
     'la_vallee': 6,
     'lausanne': 134,
     'lavaux': 53,
     'morges': 16,
     'moudon': 12,
     'neuchatel': 1,
     'nyon': 10,
     'orbe': 4,
     'oron': 12,
     'payerne': 4,
     'pays_d_enhaut': 11,
     'rolle': 8,
     'suisse_allemande': 36,
     'vaud': 5,
     'vevey': 15,
     'yverdon': 11,
     'not_lausanne': 288},
    'area': 77727.830078125},
   'geometry': {

In [24]:
df.job_category.unique()

array(['administration', 'agricole', 'artisanat', 'commerce',
       'construction', 'rente', 'service'], dtype=object)

In [25]:
# For each division, compute the percentage of jobs in each category in the dataframe
# by dividing the number of jobs in each category by the total of population in the division
(df_pop.groupby(["division", "chef_vocation_class"]).count()["nom_rue"] / df_pop.groupby("division").size()).reset_index().groupby("chef_vocation_class").max()

Unnamed: 0_level_0,division,0
chef_vocation_class,Unnamed: 1_level_1,Unnamed: 2_level_1
administration,15,0.08134
agricole,18,0.657143
artisanat,18,0.308081
commerce,15,0.135
construction,18,0.135458
rente,17,0.412979
service,18,0.158103


In [26]:
# For each division, compute the percentage of jobs in each category in the dataframe
# by dividing the number of jobs in each category by the total of population in the division
(df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"] / df_pop.groupby("division").size()).reset_index().groupby("chef_origine_class").max()

Unnamed: 0_level_0,division,0
chef_origine_class,Unnamed: 1_level_1,Unnamed: 2_level_1
aigle,17,0.032609
angleterre,15,0.041298
aubonne,15,0.030303
avenches,13,0.014354
cossonay,17,0.065217
echallens,18,0.045455
france,17,0.093264
fribourg,18,0.085714
geneve,15,0.017699
grandson,14,0.01087


In [27]:
# Add a new origin category "not_lausanne" to the origin dataframe
df_pop["chef_origine_class"] = df_pop["chef_origine_class"].fillna("not_lausanne")

In [32]:
origin

division  chef_origine_class
1         aigle                  3
          angleterre            12
          aubonne               10
          avenches               1
          cossonay              17
                                ..
18        la_vallee             14
          lausanne               7
          oron                   1
          pays_d_enhaut          1
          vaud                   3
Name: nom_rue, Length: 375, dtype: int64

In [48]:
population = df_pop.groupby("division").size().to_dict()

In [49]:
population

{1: 504,
 2: 198,
 3: 200,
 4: 193,
 5: 148,
 6: 341,
 7: 92,
 8: 281,
 9: 253,
 10: 251,
 11: 220,
 12: 184,
 13: 209,
 14: 199,
 15: 339,
 16: 69,
 17: 87,
 18: 35}

In [61]:
# Iterate over the origins to find the max of not_lausanne
max_not_lausanne = 0

yas = 0
for div, pop in zip(range(1, 19), population.values()):
    yas = max(yas, (origin[div].sum() - origin[div]["lausanne"]) / pop)
    print((origin[div].sum() - origin[div]["lausanne"]))
yas

288
106
101
91
65
165
49
146
138
129
108
99
111
115
152
33
34
23


0.6571428571428571

In [8]:
# For each division, compute the percentage of jobs in each category in the dataframe
# by dividing the number of jobs in each category by the total of population in the division
(df_pop.groupby(["division", "chef_origine_class"]).count()["nom_rue"] / df_pop.groupby("division").size()).reset_index().groupby("chef_origine_class").max()

Unnamed: 0_level_0,division,0
chef_origine_class,Unnamed: 1_level_1,Unnamed: 2_level_1
lausanne,18,0.54023
not_lausanne,18,0.8


In [73]:
df_area.set_index("division").to_dict()["area"]

{18: 568286.1264648438,
 17: 1989294.255859375,
 16: 2436997.1813964844,
 15: 6444723.820068359,
 14: 44075.977783203125,
 13: 38524.51123046875,
 12: 34376.814697265625,
 11: 14163.451416015623,
 10: 37988.86572265625,
 9: 26994.397705078125,
 8: 22216.212890625,
 7: 12413.55029296875,
 6: 16866.91943359375,
 5: 9749.579833984377,
 4: 73519.44750976562,
 3: 30742.01025390625,
 2: 13174.86279296875,
 1: 77727.830078125}

In [77]:
max_value = 0
for i in range(1, 19):
    max_value = max(max_value, population[i] / df_area.set_index("division").to_dict()["area"][i])
    print(i, population[i] / df_area.set_index("division").to_dict()["area"][i])
max_value

1 0.00648416403099668
2 0.015028619509090447
3 0.006505755425495862
4 0.0026251557450069755
5 0.015180141351744447
6 0.020217088327393808
7 0.00741125607330164
8 0.012648420384852314
9 0.009372315054556902
10 0.006607199115458339
11 0.015532937102550465
12 0.005352444710784551
13 0.005425117498562926
14 0.004514931035196155
15 5.2601167942120476e-05
16 2.8313532952246006e-05
17 4.373410305878353e-05
18 6.158869338888432e-05


0.020217088327393808

In [68]:
# Save the new geojson file
with open("berney_divisions.geojson", "w") as f:
    json.dump(data, f)