# 2021 Repräsentationsindex
Wie gut sind Sie repräsentiert
Input:
* Gemeinde
* Jahrgang
* Geschlecht

In [1]:
import pandas as pd
import numpy as np
import cleandata as cd
import json
import os
import math

## Settings

In [2]:
export_folder = os.path.join('..', 'export', 'json')
export_folder

'..\\export\\json'

In [3]:
df = pd.read_csv('../data/Gemeinde_Exekutive - daten.csv')
df_age_raw = pd.read_csv('../data/KANTON_ZUERICH_bevoelkerung_1jahresklassen.csv', sep=';')

## Clean Data

In [4]:
df = cd.clean(df)

Jahrgänge nicht zugeordnet: 0
Partei nicht zugeordnet: 9
Keine Jahrgänge: 18


In [5]:
# Clean Gemeinde-Names for Join
df['Gemeinde'] = df['Gemeinde'].str.replace(' (ZH)', '', regex=False)

df_age_raw['GEMEINDE'] = df_age_raw['GEMEINDE'].str.replace('a.A.', 'am Albis', regex=False)
df_age_raw['GEMEINDE'] = df_age_raw['GEMEINDE'].str.replace('a.I.', 'am Irchel', regex=False)
df_age_raw['GEMEINDE'] = df_age_raw['GEMEINDE'].str.replace('a.d.Th.', 'an der Thur', regex=False)
df_age_raw['GEMEINDE'] = df_age_raw['GEMEINDE'].str.replace('a.S.', 'am See', regex=False)
df_age_raw['GEMEINDE'] = df_age_raw['GEMEINDE'].str.replace('a.d.L.', 'an der Limmat', regex=False)

# Prepare Alter

In [6]:
df_age = df_age_raw[df_age_raw.JAHR == 2020]

df_age = df_age.groupby(['GEMEINDE_BFS_NR', 'ALTERSKLASSE_CODE']).agg({'GEMEINDE': 'first', 'ALTERSKLASSE': 'first', 'ANZAHL_PERSONEN': 'sum'}).reset_index()

# Loop through gemeinderäte
for i, row in df[df.Alter.notna()].iterrows():
    df_age.loc[df_age.ALTERSKLASSE_CODE.between(row['Alter'] - 5, row['Alter'] + 5) & (df_age['GEMEINDE'] == row['Gemeinde']), 'represented'] = True
    
df_age['represented'] = df_age['represented'].fillna(False)

# Add age Group
"""
def add_age_group(x):
    if x < 18:
        return '<18'
    elif x <= 39:
        return '18-39'
    elif x <= 64:
        return '40-64'
    elif x <= 79:
        return '65-79'
    else:
        return '>80'

df_age['agegroup'] = df_age['ALTERSKLASSE_CODE'].apply(add_age_group)
"""

"\ndef add_age_group(x):\n    if x < 18:\n        return '<18'\n    elif x <= 39:\n        return '18-39'\n    elif x <= 64:\n        return '40-64'\n    elif x <= 79:\n        return '65-79'\n    else:\n        return '>80'\n\ndf_age['agegroup'] = df_age['ALTERSKLASSE_CODE'].apply(add_age_group)\n"

## Export JSON

In [11]:
def export_gemeinde(name):

    # Select Gemeinde
    df_g = df_age[df_age.GEMEINDE == name].copy()

    # Reindex so that missing ages are filled with 0
    df_g = df_g.set_index('ALTERSKLASSE_CODE')
    df_g = df_g.reindex(np.arange(0, df_g.index.max())).fillna(0)
    df_g = df_g.reset_index()

    record = {
        "name": df_g.iloc[0]['GEMEINDE'],
        "bfs": int(df_g.iloc[0]['GEMEINDE_BFS_NR']),
        "altersstruktur": [],
        "personen": [],
        "personen2": []

    }

    # Add Altersstruktur
    for i, row in df_g.iterrows():
        r = {
            'age': row['ALTERSKLASSE_CODE'],
            'count': row['ANZAHL_PERSONEN'],
            'represented': row['represented']
        }
        #record['altersstruktur'][row['ALTERSKLASSE_CODE']] = r
        record['altersstruktur'].append(r)

    # Add Räte Version 1
    
    for i, row in df[df.Gemeinde == name].iterrows():

        if math.isnan(row['Jahrgang']):
            jg = None
        else:
            jg = round(row['Jahrgang']) if row['jahrgang_nicht_zugeordnet'] == False else None

        r = {
            "name": row['Name_cleaned'],
            "jahrgang": jg,
            "partei": row['Partei'] if row['partei_nicht_zugeordnet'] == False else None, 
            "geschlecht": row['Geschlecht']
        }
        record['personen2'].append(r)
        
    # Add Räte Version 2
    df_sub = df[df.Gemeinde == name].groupby(['Jahrgang', 'Name_cleaned']).agg({'partei_c': 'first', 'Geschlecht': 'first'})

    for i, rows in df_sub.groupby(level=0):
        jg = []
        for j, rowj in df_sub.iloc[df_sub.index.get_level_values('Jahrgang') == i].reset_index().iterrows():
            #print("%s (%s, %s)" % (rowj['Name'], round(row['Jahrgang']), row['Partei']))
            jg.append("%s (%s, %s)" % (rowj['Name_cleaned'], round(row['Jahrgang']), row['Partei']))

        record['personen'].append({
            "alter": round(2021 - i),
            "values": jg
        }) 

    # Store
    with open(os.path.join(export_folder, name.lower() + '.json'), "w") as f:
        json.dump(record, f)

export_gemeinde('Winkel')


In [18]:
df_gemeinden = df.groupby('Gemeinde').count().reset_index()
    
for i, row in df_gemeinden.iterrows():
    export_gemeinde(row['Gemeinde'])

## Tests

In [None]:
# Settings
gemeinde = 'Dürnten'
alter = 32

my_agegroup = add_age_group(alter)

### Representation Test mit fixen Altersgruppe
Gruppen:
* < 18
* 18 - 39
* 40 - 64
* 65 - 79
+ 80+

In [None]:
# Select Gemeinde in Gemeinderäte
df_raete = df[df.Gemeinde == gemeinde].copy()
df_raete['agegroup'] = df_raete['Alter'].apply(add_age_group)

# Group by agegroup and calc Percentage
df_test = df_age[df_age.GEMEINDE == gemeinde].groupby('agegroup').agg({'ANZAHL_PERSONEN': 'sum'})
df_test['%'] = round(100 / df_test['ANZAHL_PERSONEN'].sum() * df_test['ANZAHL_PERSONEN'])
df_test['personen_soll'] = len(df_raete) / 100 * df_test['%']

# Group raete
df_raete = df_raete.groupby('agegroup').agg({'Name': 'count'}).rename(columns={'Name': 'personen_ist'})

# Join
df_test = df_test.join(df_raete)
df_test = df_test.fillna(0)
df_test['abbr'] = df_test['personen_ist'] - df_test['personen_soll']

df_test

## Representations Test mit fluiden Altersgruppen

In [None]:
agegroup_plus_minus = 5

df_test = df_age[df_age.GEMEINDE == gemeinde].copy()

# Select your age
df_yourage = df_test[(df_test.ALTERSKLASSE_CODE.between(alter - agegroup_plus_minus, alter + agegroup_plus_minus))]

# Calc percentage of your age
round(100 / df_test['ANZAHL_PERSONEN'].sum() * df_yourage['ANZAHL_PERSONEN'].sum())

# Group raete
df_raete = df[df.Gemeinde == gemeinde].copy()
#df_raete = df_raete.groupby('Alter').agg({'Name': 'count'}).rename(columns={'Name': 'count'})

#df_test = df_test.set_index('ALTERSKLASSE_CODE')

# Loop through gemeinderäte
for i, row in df_raete.iterrows():
    df_test.loc[df_test.ALTERSKLASSE_CODE.between(row['Alter'] - 5, row['Alter'] + 5), 'represented'] = True
    
df_test['represented'] = df_test['represented'].fillna(False)

# Join
#df_test = df_test.join(df_raete)

In [None]:
df_test = df_age[df_age.GEMEINDE == 'Winterthur']

# Set Color
df_test['color'] = df_test['represented'].apply(lambda x: 'red' if x else 'grey')

#df_plot = df_test.set_index('ALTERSKLASSE_CODE')
df_test.plot('ALTERSKLASSE_CODE', 'ANZAHL_PERSONEN', kind='bar', figsize=(10, 10), color=df_test['color'].tolist())