In [2]:
import pandas as pd

In [3]:
%matplotlib inline

## Read data and separate Switzerland, Cantons, Districts, Communes

### Load the Excel with all the data in it

In [4]:
df = pd.read_excel('ReadinData/px-x-0102010000_103 - Parteien.xlsx', sheetname='relrel')

In [5]:
df.head(2)

Unnamed: 0,Nummer,Einheit,BDP,CVP,EDU,EVP,FDP,GLP,Grüne,SP,SVP,Übrige
0,8100,Schweiz,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4551,...... Aadorf,-17.504673,72.846195,111.243296,8.748266,-38.426064,25.40505,-26.529719,-44.767115,36.936559,-96.692709


In [6]:
#Make sure we don't have any NaN in there
df = df[df['Einheit'].notnull()]

### Load some helper files

In [7]:
#Load the numbers and abbreviations file for the cantons
df_kant_ids = pd.read_csv('HelperData/Kantone-ABK-ID.csv')
df_kant_ids.head(2)

Unnamed: 0,Kanton_ABK,Kanton_ID,Kanton_Name,Kanton_Name2
0,ZH,1,Zürich,Zürich
1,BE,2,Bern,Bern / Berne


In [8]:
#Load the canton/district infos for the communes
df_gem_ids = pd.read_excel('HelperData/Gemeindestand-2015.xls')
df_gem_ids.head(2)

Unnamed: 0,Hist.-Nummer,Kanton,Bezirksnummer,Bezirksname,BFS Gde-nummer,Gemeindename,Datum der Aufnahme
0,13256,ZH,101,Affoltern,1,Aeugst am Albis,1976-11-15
1,11742,ZH,101,Affoltern,2,Affoltern am Albis,1960-01-01


In [9]:
df_gem_ids.pop('Hist.-Nummer')
df_gem_ids.pop('Gemeindename')
df_gem_ids.pop('Datum der Aufnahme')
df_gem_ids.rename(columns={'Bezirksnummer': 'Bezirk_ID', 'Bezirksname': 'Bezirk_Name', 'BFS Gde-nummer': 'Gemeinde_ID', 'Kanton': 'Kanton_ABK'}, inplace=True)
df_gem_ids.head(2)

Unnamed: 0,Kanton_ABK,Bezirk_ID,Bezirk_Name,Gemeinde_ID
0,ZH,101,Affoltern,1
1,ZH,101,Affoltern,2


In [10]:
#Construct a set of district ids
df_bez_ids = df_gem_ids.drop_duplicates(subset='Bezirk_ID')
df_bez_ids.pop('Gemeinde_ID')
df_bez_ids.head(2)

Unnamed: 0,Kanton_ABK,Bezirk_ID,Bezirk_Name
0,ZH,101,Affoltern
14,ZH,102,Andelfingen


## Get a dataframe for each geopraphic level

### Separate Switzerland

In [11]:
#Switzerland
df_ch = df[df['Einheit'] == 'Schweiz']
df_ch.pop('Einheit')
df_ch.pop('Nummer')
df_ch.head(2)

Unnamed: 0,BDP,CVP,EDU,EVP,FDP,GLP,Grüne,SP,SVP,Übrige
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
df_ch.shape

(1, 10)

### Separate the cantons

In [13]:
#Cantons - there should be 26 of them
df_kant = df[df['Einheit'].str.contains('^- ')]
df_kant['Kanton_Name'] = df_kant['Einheit'].str.extract('^- (.*)', expand=True)
df_kant.head(2)

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [15]:
#Add the ID and Abbreviations
df_kant = df_kant.merge(df_kant_ids, how='inner', left_on='Kanton_Name', right_on='Kanton_Name2')
df_kant.pop('Kanton_Name2')
df_kant.pop('Einheit')
df_kant.pop('Kanton_Name_y')
df_kant.pop('Nummer')
df_kant.rename(columns={'Kanton_Name_x': 'Kanton_Name'}, inplace=True)
df_kant.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,94,95,96,97,98,99,100,Kanton_Name,Kanton_ABK,Kanton_ID
0,10.140814,10.189053,6.717105,4.113012,3.86379,1.440426,1.528121,0.158892,-0.562474,-2.143233,...,-7.446098,-7.251524,-12.883064,-12.916293,-9.923975,-11.694637,-17.112433,Zürich,ZH,1
1,-4.726403,-6.898074,-5.190242,-3.755251,-5.147559,-4.903439,-5.454057,-4.361319,-5.196198,-6.478511,...,19.993818,17.798107,22.045135,12.675966,22.641016,29.919495,18.411909,Bern / Berne,BE,2


In [16]:
df_kant.shape

(26, 104)

### Separate the districts

In [17]:
#Districts - there should be 148 of them
df_bez = df[df['Einheit'].str.contains('^>> ')]
df_bez['Bezirk_Name'] = df_bez['Einheit'].str.extract('^>> (.*)', expand=True)
df_bez['Bezirk_ID'] = df_bez['Nummer'].str.extract('00(.*)', expand=True).astype(int)
df_bez.pop('Nummer')
df_bez.pop('Einheit')
df_bez.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,Bezirk_Name,Bezirk_ID
2,-3.637422,5.70352,6.688852,11.205635,11.71963,13.372219,12.539357,23.447937,5.901035,25.166107,...,-49.942001,-65.543312,-57.169497,-52.08349,-44.654332,-76.40758,-65.146403,-69.006628,Bezirk Affoltern,101
17,-2.519988,-11.547593,4.637739,7.071592,5.408449,-1.333561,15.971864,6.695271,16.621429,16.429416,...,-50.442836,-38.598497,-33.950762,-37.738805,-47.164924,37.948598,-12.659512,-48.221964,Bezirk Andelfingen,102


In [18]:
df_bez.shape

(148, 103)

### Separate the communes

In [19]:
#Gemeinden - there should be 2324 of them
df_gem = df[df['Einheit'].str.contains('^\.\.\.\.\.\.')]
df_gem['Gemeinde_ID'] = df_gem['Einheit'].str.extract('^\.\.\.\.\.\.(\d\d\d\d)', expand=True).astype(int)
df_gem['Gemeinde_Name'] = df_gem['Einheit'].str.extract('^\.\.\.\.\.\.\d\d\d\d (.*)', expand=True)
df_gem.pop('Nummer')
df_gem.pop('Einheit')
df_gem.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,Gemeinde_ID,Gemeinde_Name
3,-5.933256,3.068971,9.766189,29.185463,-4.503691,3.880414,10.811844,11.58333,3.316813,4.3323,...,-51.706156,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,1,Aeugst am Albis
4,-1.152366,2.144625,-14.735345,-13.407698,3.751546,-2.073101,-5.40063,-3.025657,-11.71963,7.683802,...,-34.629241,-59.502699,-70.958193,-6.138719,4.541968,-48.009221,-23.192888,-54.466462,2,Affoltern am Albis


In [20]:
df_gem.shape

(2324, 103)

## Export the data

In [21]:
#Switzerland
df_ch.to_csv('StructuredData/CH-Alter.csv', index=False)
df_temp = pd.read_csv('StructuredData/CH-Alter.csv')
df_temp.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
#Cantons
df_kant.to_csv('StructuredData/kant-Alter.csv', index=False)
df_temp = pd.read_csv('StructuredData/kant-Alter.csv')
df_temp.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,94,95,96,97,98,99,100,Kanton_Name,Kanton_ABK,Kanton_ID
0,10.140814,10.189053,6.717105,4.113012,3.86379,1.440426,1.528121,0.158892,-0.562474,-2.143233,...,-7.446098,-7.251524,-12.883064,-12.916293,-9.923975,-11.694637,-17.112433,Zürich,ZH,1
1,-4.726403,-6.898074,-5.190242,-3.755251,-5.147559,-4.903439,-5.454057,-4.361319,-5.196198,-6.478511,...,19.993818,17.798107,22.045135,12.675966,22.641016,29.919495,18.411909,Bern / Berne,BE,2


In [23]:
#Districts
df_bez.to_csv('StructuredData/bez-Alter.csv', index=False)
df_temp = pd.read_csv('StructuredData/bez-Alter.csv')
df_temp.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,Bezirk_Name,Bezirk_ID
0,-3.637422,5.70352,6.688852,11.205635,11.71963,13.372219,12.539357,23.447937,5.901035,25.166107,...,-49.942001,-65.543312,-57.169497,-52.08349,-44.654332,-76.40758,-65.146403,-69.006628,Bezirk Affoltern,101
1,-2.519988,-11.547593,4.637739,7.071592,5.408449,-1.333561,15.971864,6.695271,16.621429,16.429416,...,-50.442836,-38.598497,-33.950762,-37.738805,-47.164924,37.948598,-12.659512,-48.221964,Bezirk Andelfingen,102


In [24]:
#Communes
df_gem.to_csv('StructuredData/gem-Alter.csv', index=False)
df_temp = pd.read_csv('StructuredData/gem-Alter.csv')
df_temp.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,Gemeinde_ID,Gemeinde_Name
0,-5.933256,3.068971,9.766189,29.185463,-4.503691,3.880414,10.811844,11.58333,3.316813,4.3323,...,-51.706156,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,1,Aeugst am Albis
1,-1.152366,2.144625,-14.735345,-13.407698,3.751546,-2.073101,-5.40063,-3.025657,-11.71963,7.683802,...,-34.629241,-59.502699,-70.958193,-6.138719,4.541968,-48.009221,-23.192888,-54.466462,2,Affoltern am Albis
