In [318]:
import pandas as pd

# Import of Data
# Source: https://www.bmi.gv.at/412/Nationalratswahlen/Nationalratswahl_2019/
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
df = pd.read_csv("NRW19.csv", sep=';',thousands = '.', decimal = ',')
# Amtliche Gemeindekennzahl (GKZ)

In [310]:
# Data Preparation
df_reduced = df

## Remove all "Wahlkarten"
df_reduced = df_reduced[~df_reduced.GKZ.str.endswith('99')]
## Remove all aggregated results
df_reduced = df_reduced[~df_reduced.GKZ.str.endswith('00')]

## we will only need the percentages and "GKZ", "Gebietsname", "Stimmen"
import numpy
party_names = df_reduced.columns[numpy.append(df_reduced.columns.str.startswith('%')[1:], [False])]
percentages_names = df_reduced.columns[df_reduced.columns.str.startswith('%')]
df_reduced = df_reduced.drop(party_names, axis=1)
df_reduced.rename(columns=dict(zip(percentages_names, party_names)),inplace=True)
df_reduced = df_reduced.drop(['Wahlbe-rechtigte', 'Ungültige', 'Gültige'], axis=1)

## Aggregate (i.e., sum up) all the percentages of all small parties
small_parties_name = ['KPÖ', 'WANDL', 'BZÖ', 'BIER', 'CPÖ', 'GILT', 'SLP']
df_reduced['others'] = df_reduced[small_parties_name].sum(axis=1)
df_reduced = df_reduced.drop(small_parties_name, axis=1)

## Add a column with the respective name of the state
def find_state (value):
   if int(value) == 1:
      return 'Burgenland'
   if int(value) == 2:
      return 'Kärnten'
   if int(value) == 3:
      return 'Niederösterreich'
   if int(value) == 4:
      return 'Oberösterreich'
   if int(value) == 5:
      return 'Salzburg'
   if int(value) == 6:
      return 'Steiermark.'
   if int(value) == 7:
      return 'Tirol'
   if int(value) == 8:
      return 'Vorarlberg'
   if int(value) == 9:
      return 'Wien'
   return 'Other'

df_reduced['state'] = df_reduced.GKZ.str[1].apply(find_state)

#pd.set_option('display.max_rows', 3)
df_reduced

Unnamed: 0,GKZ,Gebietsname,Stimmen,ÖVP,SPÖ,FPÖ,NEOS,JETZT,GRÜNE,others,state
8,G10101,Eisenstadt,7192,41.69,20.38,14.38,7.52,1.69,13.27,1.07,Burgenland
11,G10201,Rust,1055,34.23,30.38,20.15,5.01,1.16,7.91,1.16,Burgenland
14,G10301,Breitenbrunn am Neusiedler See,1137,33.87,29.82,13.42,6.94,3.42,11.89,0.63,Burgenland
15,G10302,Donnerskirchen,1076,44.38,22.85,18.89,5.67,1.23,6.33,0.66,Burgenland
16,G10303,Großhöflein,1164,42.76,23.79,16.07,5.88,0.61,10.10,0.79,Burgenland
...,...,...,...,...,...,...,...,...,...,...,...
2433,G91901,Döbling,25563,32.26,21.64,10.86,13.34,3.22,17.17,1.50,Wien
2436,G92001,Brigittenau,22535,20.07,36.48,14.62,5.83,2.92,17.83,2.25,Wien
2439,G92101,Floridsdorf,56568,25.99,30.49,20.13,6.65,2.84,11.94,1.95,Wien
2442,G92201,Donaustadt,69291,25.83,29.42,18.26,7.78,2.96,13.79,1.97,Wien


In [311]:
# https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(df_reduced[['Stimmen', 'ÖVP', 'SPÖ', 'FPÖ', 'NEOS', 'JETZT', 'GRÜNE', 'others']])
#print(pca.explained_variance_ratio_)
#print(pca.singular_values_)
#df_reduced["pca1"] = pd.DataFrame(pca.transform(df_reduced[['Stimmen', 'ÖVP', 'SPÖ', 'FPÖ', 'NEOS', 'JETZT', 'GRÜNE', 'others']]))[0]

pd.set_option('display.max_rows', 100)

In [312]:
df_reduced

Unnamed: 0,GKZ,Gebietsname,Stimmen,ÖVP,SPÖ,FPÖ,NEOS,JETZT,GRÜNE,others,state,pca1
8,G10101,Eisenstadt,7192,41.69,20.38,14.38,7.52,1.69,13.27,1.07,Burgenland,-483.634028
11,G10201,Rust,1055,34.23,30.38,20.15,5.01,1.16,7.91,1.16,Burgenland,-681.635545
14,G10301,Breitenbrunn am Neusiedler See,1137,33.87,29.82,13.42,6.94,3.42,11.89,0.63,Burgenland,-164.642545
15,G10302,Donnerskirchen,1076,44.38,22.85,18.89,5.67,1.23,6.33,0.66,Burgenland,-981.644006
16,G10303,Großhöflein,1164,42.76,23.79,16.07,5.88,0.61,10.10,0.79,Burgenland,-196.623586
...,...,...,...,...,...,...,...,...,...,...,...,...
2433,G91901,Döbling,25563,32.26,21.64,10.86,13.34,3.22,17.17,1.50,Wien,
2436,G92001,Brigittenau,22535,20.07,36.48,14.62,5.83,2.92,17.83,2.25,Wien,
2439,G92101,Floridsdorf,56568,25.99,30.49,20.13,6.65,2.84,11.94,1.95,Wien,
2442,G92201,Donaustadt,69291,25.83,29.42,18.26,7.78,2.96,13.79,1.97,Wien,


In [313]:
df_reduced_final = df_reduced
df_reduced_final.reset_index()
print(df_reduced_final)
print(pd.DataFrame(pca.transform(df_reduced_final[['Stimmen', 'ÖVP', 'SPÖ', 'FPÖ', 'NEOS', 'JETZT', 'GRÜNE', 'others']])).reset_index())
#df_reduced_final[['pca1']] = [0]

0        5361.358259
1        -775.634670
2        -693.632844
3        -754.643469
4        -666.640990
            ...     
2113    23732.361260
2114    20704.373154
2115    54737.356176
2116    67460.353105
2117    39409.359520
Name: 0, Length: 2118, dtype: float64