# Nationalratswahlen

In [1]:
import os
from pyaxis import pyaxis
import json
import pandas as pd
import sys

In [2]:
from constants import INDICATOR_ID_MAPPING, URL_MAPPING, CSV_MAPPING, METADATA_MAPPING, SPATIAL_UNIT_ONTOLOGY, TIME_INFO_ID
from utils import map_name_to_number

In [3]:
dataset = 'national_council_elections'

In [4]:
# load data from bfs
px = pyaxis.parse(URL_MAPPING[dataset], encoding='ISO-8859-2')
df = px['DATA']

In [5]:
# rename 'Kanton (-) / Bezirk (>>) / Gemeinde (......)' to 'name_de'
df = df.rename(columns={
    'Kanton (-) / Bezirk (>>) / Gemeinde (......)': 'name_de', 
    'Jahr': 'year', 
    'Ergebnisse': 'results', 
    'DATA': 'data'
})

In [6]:
df['name_de'] = df['name_de'].str.replace('.', '')
df['name_de'] = df['name_de'].str.replace('>> ', '')
df['name_de'] = df['name_de'].str.replace('   ', '')

  df['name_de'] = df['name_de'].str.replace('.', '')


In [7]:
df.head()

Unnamed: 0,name_de,year,results,data
0,Schweiz,2019,Wahlberechtigte,5459218.0
1,Schweiz,2019,Wählende / Eingelegte Wahlzettel (WZ),2462641.0
2,Schweiz,2019,Wahlbeteiligung [%],45.1097758
3,Schweiz,2019,Leere WZ,9366.0
4,Schweiz,2019,Ungültige WZ,29015.0


In [8]:
df, metadata = map_name_to_number(df, columns_to_map=['results'])

In [15]:
df.head()

Unnamed: 0,name_de,year,results,data,spatialunit_id
0,Schweiz,2019,16,5459218.0,True
1,Schweiz,2019,0,2462641.0,True
2,Schweiz,2019,8,45.1097758,True
3,Schweiz,2019,1,9366.0,True
4,Schweiz,2019,12,29015.0,True


In [17]:
df['name_de'].unique()

array(['Schweiz', 'Zürich', 'Bezirk Affoltern', ..., 'Clos du Doubs',
       'Haute-Ajoie', 'La Baroche'], dtype=object)

In [19]:
# create new dataframe
df_new = pd.DataFrame({
    'indicator_id': INDICATOR_ID_MAPPING[dataset],
    'spatialunit_name': df['name_de'],
    'spatialunit_id': df['name_de'].apply(lambda x: SPATIAL_UNIT_ONTOLOGY['COUNTRY'] if x == 'Schweiz' else SPATIAL_UNIT_ONTOLOGY['DISTRICT']), # TODO
    'time_value': df['year'],
    'time_info_id': TIME_INFO_ID['YEAR'],
    'results': df['results'],
    'value': df['data']
})

In [20]:
df_new.head()

Unnamed: 0,indicator_id,spatialunit_name,spatialunit_id,time_value,time_info_id,results,value
0,1_10_101,Schweiz,CH,2019,1,16,5459218.0
1,1_10_101,Schweiz,CH,2019,1,0,2462641.0
2,1_10_101,Schweiz,CH,2019,1,8,45.1097758
3,1_10_101,Schweiz,CH,2019,1,1,9366.0
4,1_10_101,Schweiz,CH,2019,1,12,29015.0


In [26]:
# export as csv
df_new.to_csv(dataset + '.csv', index=False, na_rep='NA')

In [27]:
with open(dataset + '.json', 'w') as outfile:
    json.dump(metadata, outfile)

In [23]:
df_new.head()

Unnamed: 0,indicator_id,spatialunit_name,spatialunit_id,time_value,time_info_id,results,value
0,1_10_101,Schweiz,CH,2019,1,16,5459218.0
1,1_10_101,Schweiz,CH,2019,1,0,2462641.0
2,1_10_101,Schweiz,CH,2019,1,8,45.1097758
3,1_10_101,Schweiz,CH,2019,1,1,9366.0
4,1_10_101,Schweiz,CH,2019,1,12,29015.0


In [24]:
metadata

{'results': {0: 'Wählende / Eingelegte Wahlzettel (WZ)',
  1: 'Leere WZ',
  2: 'Listenstimmen aus unveränderten WZ',
  3: 'Kandidatenstimmen aus veränderten WZ',
  4: 'Zusatzstimmen aus veränderten WZ',
  5: 'WZ ohne Listenbezeichnung',
  6: 'Listenstimmen aus veränderten WZ',
  7: 'Total Listenstimmen',
  8: 'Wahlbeteiligung [%]',
  9: 'Gültige WZ',
  10: 'Total Kandidatenstimmen',
  11: 'Zusatzstimmen aus unveränderten WZ',
  12: 'Ungültige WZ',
  13: 'Veränderte WZ',
  14: 'Total Zusatzstimmen',
  15: 'Kandidatenstimmen aus unveränderten WZ',
  16: 'Wahlberechtigte',
  17: 'Unveränderte WZ'}}