In [1]:
import pandas as pd

In [2]:
pd.set_option("display.float_format", "{:,.1f}".format) 

This file loads sociodemographic data (age, married, household size, ...) from excel/csv form and puts it into a json from that the javascript will be able to understand and work with.

## Load the IDs and Create Base Dictionary

We first get an ID of all the districts that we can represent. Then we make a dictionary with one key for each ID. The entry is an empty dictionary

In [3]:
df_ids = pd.read_csv('StructuredData/bez-ids.csv')

In [4]:
df_ids.head(2)

Unnamed: 0,id,name
0,101,Bezirk Affoltern
1,102,Bezirk Andelfingen


In [5]:
data = {}

In [6]:
for this_id in df_ids['id']:
    data[this_id] = {}    

## Load the sociodemographic data and attach to the dictionary

### Households

In [7]:
df_hh = pd.read_csv('StructuredData/bez-Haushaltsgroesse.csv')

In [8]:
df_hh.head(2)

Unnamed: 0,1,2,3,4,5,6+,Bezirk_Name,Bezirk_ID
0,-25.0,3.3,-4.0,16.1,4.4,-8.6,Bezirk Affoltern,101
1,-25.7,7.4,-5.9,9.1,13.9,-10.0,Bezirk Andelfingen,102


In [9]:
for this_id in data.keys():
    tempdict = df_hh[df_hh['Bezirk_ID'] == this_id].round(1).to_dict(orient='records')[0]
    tempdict.pop('Bezirk_ID')
    tempdict.pop('Bezirk_Name')
    data[this_id]['Haushaltsgroesse'] = tempdict

### Age

In [10]:
df_age = pd.read_csv('StructuredData/bez-Alter.csv')

In [11]:
df_age.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,Bezirk_Name,Bezirk_ID
0,-3.6,5.7,6.7,11.2,11.7,13.4,12.5,23.4,5.9,25.2,...,-49.9,-65.5,-57.2,-52.1,-44.7,-76.4,-65.1,-69.0,Bezirk Affoltern,101
1,-2.5,-11.5,4.6,7.1,5.4,-1.3,16.0,6.7,16.6,16.4,...,-50.4,-38.6,-34.0,-37.7,-47.2,37.9,-12.7,-48.2,Bezirk Andelfingen,102


In [12]:
for this_id in data.keys():
    tempdict = df_age[df_age['Bezirk_ID'] == this_id].round(1).to_dict(orient='records')[0]
    tempdict.pop('Bezirk_ID')
    tempdict.pop('Bezirk_Name')
    data[this_id]['Alter'] = tempdict

### Marital Status

In [13]:
df_ziv = pd.read_csv('StructuredData/bez-Zivilstand.csv')

In [14]:
df_ziv.head(2)

Unnamed: 0,Ledig,Verheiratet,Geschieden,Wervitwet,Bezirk_Name,Bezirk_ID
0,-5.3,1.6,3.5,27.5,Arrondissement administratif Jura bernois,241
1,-0.3,1.2,-6.1,2.8,Bezirk Aarau,1901


In [15]:
for this_id in data.keys():
    tempdict = df_ziv[df_ziv['Bezirk_ID'] == this_id].round(1).to_dict(orient='records')[0]
    tempdict.pop('Bezirk_ID')
    tempdict.pop('Bezirk_Name')
    data[this_id]['Zivilstand'] = tempdict

### Nationality

In [16]:
df_ziv = pd.read_csv('StructuredData/bez-Nationalitaet.csv')

In [17]:
df_ziv.head(2)

Unnamed: 0,Schweiz,Afghanistan,Ägypten,Albanien,Algerien,Andorra,Angola,Antigua und Barbuda,Äquatorialguinea,Argentinien,...,Vereinigte Arabische Emirate,Vereinigte Staaten,Vereinigtes Königreich,Vietnam,Westsahara,Zentralafrikanische Republik,Zypern,Ohne Angabe,Bezirk_Name,Bezirk_ID
0,9.1,-32.7,-21.1,-49.3,-56.6,-100.0,-65.1,-100.0,-100.0,-36.1,...,-100.0,-26.3,22.5,-96.3,-100.0,-100.0,-100.0,21.0,Bezirk Affoltern,101
1,15.5,8.5,-67.0,-100.0,-93.4,-100.0,-100.0,-100.0,-100.0,-69.5,...,-100.0,-69.2,-38.7,-63.3,-100.0,-100.0,-100.0,83.8,Bezirk Andelfingen,102


In [18]:
for this_id in data.keys():
    tempdict = df_ziv[df_ziv['Bezirk_ID'] == this_id].round(1).to_dict(orient='records')[0]
    tempdict.pop('Bezirk_ID')
    tempdict.pop('Bezirk_Name')
    data[this_id]['Nationalitaet'] = tempdict

### Parties

In [19]:
df_ziv = pd.read_csv('StructuredData/bez-Parteien.csv')

In [20]:
df_ziv.head(2)

Unnamed: 0,BDP,CVP,EDU,EVP,FDP,GLP,Grüne,SP,SVP,Übrige,Bezirk_Name,Bezirk_ID
0,35.9,-75.9,189.8,113.3,-46.3,-50.2,-4.6,37.4,24.5,-22.7,Arrondissement administratif Jura bernois,241
1,8.3,-53.3,0.3,143.3,6.3,32.0,-7.9,8.5,8.0,-58.4,Bezirk Aarau,1901


In [21]:
for this_id in data.keys():
    tempdict = df_ziv[df_ziv['Bezirk_ID'] == this_id].round(1).to_dict(orient='records')[0]
    tempdict.pop('Bezirk_ID')
    tempdict.pop('Bezirk_Name')
    data[this_id]['Parteien'] = tempdict

### Income

In [24]:
df_ein = pd.read_csv('StructuredData/bez-Einkommen.csv')

In [25]:
df_ein.head(2)

Unnamed: 0,Bezirk_ID,Bezirk_Name,Bis 14’999,15’000-19’999,20’000-29’999,30’000-39’999,40’000-49’999,50’000-74999,75’000 und mehr
0,101,Affoltern,-70.5,-29.2,-22.5,-21.7,-16.7,-5.2,35.0
1,102,Andelfingen,-70.1,-21.2,-16.7,-13.7,-2.3,-2.4,20.3


In [26]:
for this_id in data.keys():
    tempdict = df_ein[df_ein['Bezirk_ID'] == this_id].round(1).to_dict(orient='records')[0]
    tempdict.pop('Bezirk_ID')
    tempdict.pop('Bezirk_Name')
    data[this_id]['Einkommen'] = tempdict

## Export to JSON

In [28]:
with open('web/json/data_bez.js', 'w') as f:
    f.write('var data = ' + str(data))

In [27]:
data

{101: {'Alter': {'0': -3.6,
   '1': 5.7,
   '10': 10.2,
   '100': -69.0,
   '11': 21.1,
   '12': 4.5,
   '13': 8.5,
   '14': 14.6,
   '15': 3.5,
   '16': 1.7,
   '17': 10.1,
   '18': -6.4,
   '19': 0.5,
   '2': 6.7,
   '20': 4.9,
   '21': -2.1,
   '22': -12.0,
   '23': -9.1,
   '24': -14.8,
   '25': -17.3,
   '26': -18.3,
   '27': -27.6,
   '28': -27.5,
   '29': -13.6,
   '3': 11.2,
   '30': -15.8,
   '31': -15.0,
   '32': -18.7,
   '33': -13.5,
   '34': -14.1,
   '35': -10.1,
   '36': 0.2,
   '37': -4.0,
   '38': -5.0,
   '39': 6.6,
   '4': 11.7,
   '40': 3.4,
   '41': 5.0,
   '42': 7.3,
   '43': 11.9,
   '44': 9.1,
   '45': 17.1,
   '46': 11.0,
   '47': 10.9,
   '48': 15.0,
   '49': 16.3,
   '5': 13.4,
   '50': 9.3,
   '51': 5.5,
   '52': 5.2,
   '53': 3.2,
   '54': 7.5,
   '55': 3.5,
   '56': 0.0,
   '57': 1.6,
   '58': -2.4,
   '59': 5.0,
   '6': 12.5,
   '60': 4.8,
   '61': 1.5,
   '62': 4.9,
   '63': 5.0,
   '64': 16.6,
   '65': 13.0,
   '66': 4.7,
   '67': 7.7,
   '68': 10.7,
  