We need to make **one contingency table per demographic**, so:
- Party × Gender
- Party × Age Group
- Party × Education Group

In [15]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

# Load data

In [16]:
# -------- settings --------
PRIVATE = "../data/private_dataL.xlsx"           # survey

# -------- load --------
s = pd.read_excel(PRIVATE)

# Convert DOB -> age

In [17]:
from datetime import datetime

# Convert dob to datetime
s['dob'] = pd.to_datetime(s['dob'], format='%d.%m.%Y', errors='coerce')

# calculate age correctly
today = pd.Timestamp.today()
s['age'] = (
    today.year - s['dob'].dt.year
    - ((today.month < s['dob'].dt.month) | ((today.month == s['dob'].dt.month) & (today.day < s['dob'].dt.day)))
)

# s["agegroup"] = pd.cut( # convert age into age groups
#     s["age"],
#     bins=[0, 30, 50, 65, 150],
#     labels=["<30","30–49","50–64","65+"],
#     right=False
# )

# Convert education into categories

In [18]:
# def compress_edu(x):
#     x = str(x).lower()
#     if "primary" in x:
#         return "Basic"
#     if "upper" in x or "short" in x:
#         return "Secondary"
#     if "bachelor" in x:
#         return "Postsecondary"
#     if "master" in x or "phd" in x or "university" in x:
#         return "Tertiary"
#     if "vocational" in x or "vet" in x:
#         return "Vocational"
#     return "Other"

# s["edu_compressed"] = s["education"].map(compress_edu)


# Make contingency tables

In [19]:
gender = pd.crosstab(s["sex"], s["evote"])
education = pd.crosstab(s['education'], s['evote'])
age = pd.crosstab(s['age'], s['evote'])
citizenship = pd.crosstab(s['citizenship'], s['evote'])
marital = pd.crosstab(s['marital_status'], s['evote'])
zipcode = pd.crosstab(s['zip'], s['evote'])

education

evote,0,1
education,Unnamed: 1_level_1,Unnamed: 2_level_1
Bachelors programmes,4,2
Masters programmes,14,13
Not stated,3,2
PhD programmes,3,0
Primary education,32,9
Short cycle higher education,10,4
Upper secondary education,15,4
Vocational Education and Training (VET),39,18
Vocational bachelors educations,17,11


In [20]:
chi2_gender, p_gender, dof_gender, exp_gender = chi2_contingency(gender)
chi2_education, p_education, dof_education, exp_education  = chi2_contingency(education)
chi2_age, p_age, dof_age, exp_age  = chi2_contingency(age)
chi2_citizenship, p_citizenship, dof_citizenship, exp_citizenship  = chi2_contingency(citizenship)
chi2_marital, p_marital, dof_marital, exp_marital  = chi2_contingency(marital)
chi2_zip, p_zip, dof_zip, exp_zip  = chi2_contingency(zipcode)

print(p_gender)
print(p_education)
print(p_age)
print(p_citizenship)
print(p_marital)
print(p_zip)

0.32056454236818155
0.3807175181469629
0.6007364258799078
0.6774185939037044
0.30925829858478265
0.35510323026968554
