# Analyse data

In [1]:
import pandas as pd
import json
from pathlib import Path
import utils
import datetime
import numpy as np

In [2]:
df_votum_raw = pd.read_csv(Path('../export/votum.csv'))
df_votum_raw['sitzung_date'] = pd.to_datetime(df_votum_raw['sitzung_date'])

with open(Path('../export/mitglieder.json'), encoding='utf-8') as f:
    kantonsrat = json.load(f)

# Typecast
utils.kantonsrat_to_datetime(kantonsrat)

## How many times did m/w talk?

In [34]:
df_votum = df_votum_raw.copy()

# Remove all Statements from President, 1. & 2. Vizepresidents
df_votum = df_votum[~df_votum.funktion.isin(['Präsidium', '2. Vizepräsidium', '1. Vizepräsidium'])]

# Calculate Amtsjahr (Amtsjahr 2020 = 1.5.2020 - 30.4.2021)
df_votum['year'] = df_votum['sitzung_date'].dt.year
df_votum['month'] = df_votum['sitzung_date'].dt.month
df_votum['session'] = df_votum.apply(lambda row: row['year'] if row['month'] > 5 else row['year'] - 1, axis=1)

df_votum_pivot = pd.pivot_table(df_votum, index='session', columns='geschlecht', values='name', aggfunc='count').reset_index()

df_votum_pivot['w%'] = 100 / (df_votum_pivot['m'] + df_votum_pivot['w']) * df_votum_pivot['w']

In [30]:
df_votum_pivot

geschlecht,session,m,w,v_w%
0,2012,848,324,27.645051
1,2013,2933,1089,27.076082
2,2014,2382,794,25.0
3,2015,2265,911,28.683879
4,2016,2214,905,29.01571
5,2017,2139,903,29.684418
6,2018,2460,986,28.612885
7,2019,1967,1001,33.726415
8,2020,2332,1189,33.768816
9,2021,824,303,26.885537


In [41]:
# Calculate m/w percentage of Kantonsrat
dtx = []
for year in range(df_votum['sitzung_date'].min().year, df_votum['sitzung_date'].max().year):

    dt = datetime.datetime(year, 11, 11)
    df = utils.kantonsrat_as_dataframe(kantonsrat, dt)
    df['year'] = year
    dtx.append(df)

df = pd.concat(dtx)

# Remove Präsidents
df = df[df.funktion.isna()]

df_rat = pd.pivot_table(df, index='year', columns='geschlecht', values='name', aggfunc='count').reset_index()

df_rat['w%'] = 100 / (df_rat['m'] + df_rat['w']) * df_rat['w']

# Concat
df_res = df_votum_pivot.merge(df_rat, left_on = 'session', right_on = 'year', suffixes=('_v', '_r'))

# Votes per w/m
df_res['votes_m'] = round(df_res['m_v'] / df_res['m_r'], 1)
df_res['votes_w'] = round(df_res['w_v'] / df_res['w_r'], 1)


In [42]:
df_res

geschlecht,session,m_v,w_v,w%_v,year,m_r,w_r,w%_r,votes_m,votes_w
0,2013,2933,1089,27.076082,2013,119,59,33.146067,24.6,18.5
1,2014,2382,794,25.0,2014,124,55,30.726257,19.2,14.4
2,2015,2265,911,28.683879,2015,125,62,33.15508,18.1,14.7
3,2016,2214,905,29.01571,2016,121,57,32.022472,18.3,15.9
4,2017,2139,903,29.684418,2017,120,57,32.20339,17.8,15.8
5,2018,2460,986,28.612885,2018,120,57,32.20339,20.5,17.3
6,2019,1967,1001,33.726415,2019,106,71,40.112994,18.6,14.1
7,2020,2332,1189,33.768816,2020,106,71,40.112994,22.0,16.7


In [None]:
TODO: So wenige Votes insgesamt? Checken!

In [27]:
df_rat

geschlecht,year,m,w,r_w%
0,2013,119,59,33.146067
1,2014,124,55,30.726257
2,2015,125,62,33.15508
3,2016,121,57,32.022472
4,2017,120,57,32.20339
5,2018,120,57,32.20339
6,2019,106,71,40.112994
7,2020,106,71,40.112994


In [8]:
len(df)

1430

In [None]:
df[df.vorname == 'Benno']

In [None]:
df['funktion'].unique()

In [None]:
len(df)

In [None]:
df.to_clipboard()

In [None]:
len(df)

In [None]:
df_votum.funktion.unique()

In [None]:
df_votum.head()