We need to make **one contingency table per demographic**, so:
- Party × Gender
- Party × Age Group
- Party × Education Group

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

# Load data

In [2]:
# -------- settings --------
PRIVATE = "../data/private_dataL.xlsx"           # survey

# -------- load --------
s = pd.read_excel(PRIVATE)

# Convert DOB -> age

In [3]:
from datetime import datetime

# Convert dob to datetime
s['dob'] = pd.to_datetime(s['dob'], format='%d.%m.%Y', errors='coerce')

# calculate age correctly
today = pd.Timestamp.today()
s['age'] = (
    today.year - s['dob'].dt.year
    - ((today.month < s['dob'].dt.month) | ((today.month == s['dob'].dt.month) & (today.day < s['dob'].dt.day)))
)

# s["agegroup"] = pd.cut( # convert age into age groups
#     s["age"],
#     bins=[0, 30, 50, 65, 150],
#     labels=["<30","30–49","50–64","65+"],
#     right=False
# )

# Convert education into categories

In [4]:
# def compress_edu(x):
#     x = str(x).lower()
#     if "primary" in x:
#         return "Basic"
#     if "upper" in x or "short" in x:
#         return "Secondary"
#     if "bachelor" in x:
#         return "Postsecondary"
#     if "master" in x or "phd" in x or "university" in x:
#         return "Tertiary"
#     if "vocational" in x or "vet" in x:
#         return "Vocational"
#     return "Other"

# s["edu_compressed"] = s["education"].map(compress_edu)


# Make contingency tables

In [5]:
gender = pd.crosstab(s["sex"], s["party"])
education = pd.crosstab(s['education'], s['party'])
age = pd.crosstab(s['age'], s['party'])
citizenship = pd.crosstab(s['citizenship'], s['party'])
marital = pd.crosstab(s['marital_status'], s['party'])
zipcode = pd.crosstab(s['zip'], s['party'])

age

party,Green,Invalid vote,Red
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20,1,0,0
21,1,0,0
22,2,0,0
23,5,0,0
25,6,0,0
...,...,...,...
90,1,0,0
91,1,0,0
98,0,1,0
99,0,0,1


## Calculate p-values

In [6]:
chi2_gender, p_gender, dof_gender, exp_gender = chi2_contingency(gender)
chi2_education, p_education, dof_education, exp_education  = chi2_contingency(education)
chi2_age, p_age, dof_age, exp_age  = chi2_contingency(age)
chi2_citizenship, p_citizenship, dof_citizenship, exp_citizenship  = chi2_contingency(citizenship)
chi2_marital, p_marital, dof_marital, exp_marital  = chi2_contingency(marital)
chi2_zip, p_zip, dof_zip, exp_zip  = chi2_contingency(zipcode)

print(p_gender)
print(p_education)
print(p_age)
print(p_citizenship)
print(p_marital)
print(p_zip)

0.13115155398539474
9.34277960525228e-06
7.332205509356052e-09
0.9821027388361657
9.47121102701045e-05
0.15265978584820905
