# Some numbers

In [1]:
import pandas as pd
from pathlib import Path
import glob
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import resolve1

## Count Protokolle and Pages

In [20]:
docs = 0
pages = 0

for f in glob.glob(str(Path('./../export/Files/*.pdf'))):

    parser = PDFParser(open(f, 'rb'))
    document = PDFDocument(parser)
    docs += 1
    pages += resolve1(document.catalog['Pages'])['Count']

print("Dokumente: %s" % docs)
print("Seiten: %s" % pages)

Dokumente: 1135
Seiten: 78433


In [33]:
df = pd.concat([
    pd.read_csv(Path('../export/votum/votum_0.csv')),
    pd.read_csv(Path('../export/votum/votum_1.csv'))
])

print("Voten: %s" % len(df))
print("First: %s" % df['sitzung_date'].min())
print("Last: %s" % df['sitzung_date'].max())

Voten: 80306
First: 2001-01-08
Last: 2022-03-28


## Count Geschäfte

In [30]:
df = pd.read_csv(Path('../export/geschaefte.csv'), encoding='UTF-8')

print("Geschäfte: %s" % len(df))
print("First: %s" % df['start'].min())
print("Last: %s" % df['start'].max())

Geschäfte: 10614
First: 2001-01-01
Last: 2022-04-14


## Alle Sitzungen

In [2]:
df_votum_raw = pd.concat([
    pd.read_csv(Path('../export/votum/votum_0.csv')),
    pd.read_csv(Path('../export/votum/votum_1.csv'))
])

# Remove non members (mostly former members who are now in the Regierungsrat)
df_votum_raw = df_votum_raw[df_votum_raw.ismember == True]

# Typecast
df_votum_raw['sitzung_date'] = pd.to_datetime(df_votum_raw['sitzung_date'])

# Remove empty texts
df_votum_raw = df_votum_raw[df_votum_raw.text.notna()]

# Replace CVP with Die Mitte
df_votum_raw.loc[df_votum_raw.partei.str.lower() == 'cvp', 'partei'] = "Die Mitte"

df_votum_raw['g'] = df_votum_raw['geschlecht']
df_votum_raw.loc[df_votum_raw.funktion.notna(), 'g'] = 'präsidium'
df_votum_raw.loc[df_votum_raw.ismember == False, 'g'] = 'nomember'

len(df_votum_raw)

76821

In [3]:
df = df_votum_raw
df = df[df.funktion.isna()]
df = df.groupby('g').count()
df['%'] = 100 / df['name'].sum() * df['name']

df

Unnamed: 0_level_0,name,vorname,text,page,f,sitzung_name,sitzung_date,sitzung_start,sitzung_gremium,dokument_titel,partei,geschlecht,jahrgang,funktion,ismember,%
g,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
m,43343,43343,43343,43343,43343,43343,43343,19272,43343,43343,43343,43343,43343,0,43343,71.32889
w,17422,17422,17422,17422,17422,17422,17422,8491,17422,17422,17422,17422,17422,0,17422,28.67111


## Sitzung 2021-08-23

In [10]:
df = df_votum_raw[df_votum_raw.sitzung_date == '2021-08-23'].copy()

df = df[df.funktion.isna()]

df.groupby('g').count()

Unnamed: 0_level_0,name,vorname,text,page,f,sitzung_name,sitzung_date,sitzung_start,sitzung_gremium,dokument_titel,partei,geschlecht,jahrgang,funktion,ismember
g,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
m,41,41,41,41,41,41,41,41,41,41,41,41,41,0,41
w,14,14,14,14,14,14,14,14,14,14,14,14,14,0,14


In [6]:
100 / (41+14) * 41

74.54545454545455

In [11]:
df[(df.page >= 49) & (df.page <= 65)].groupby('geschlecht').count()

Unnamed: 0_level_0,name,vorname,text,page,f,sitzung_name,sitzung_date,sitzung_start,sitzung_gremium,dokument_titel,partei,jahrgang,funktion,ismember,g
geschlecht,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
m,4,4,4,4,4,4,4,4,4,4,4,4,0,4,4
w,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5
