In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

# Read in csv file and print out the number of rows
df = pd.read_csv('GPTarget2024.csv')

# create new binary columns
df['ethnicity_white'] = np.where(df['ethnicity'] == 'white', 1, 0)
df['gender_female'] = np.where(df['gender'] == 'female', 1, 0)
df['education_ba'] = np.where(df['education'].isin(['masters_degree', 'doctoral_degree']), 0, 1)
df['religious_binary'] = np.where(df['religious_affiliation'].isin(['aethiest', 'agnostic']), 0, 1)

# list of covs
covs = ['age', 'political_party_coded', 'political_ideology_coded', 'political_engagement_coded', 'ethnicity_white', 'gender_female', 'education_ba', 'religious_binary']

# Function to perform ANOVA and return results
def run_anova(df, covariate):
    model_formula = f'{covariate} ~ C(treatment_condition)'
    model = ols(model_formula, data=df).fit()
    anova_results = anova_lm(model)
    anova_results.reset_index(inplace=True)
    anova_results.rename(columns={'index': 'term'}, inplace=True)
    anova_results = anova_results[anova_results['term'] != 'Residual']
    anova_results['covariate'] = covariate
    return anova_results

# Apply the function to each covariate and concatenate the results
out_cov_checks = pd.concat([run_anova(df, cov) for cov in covs])

print(out_cov_checks)

                     term   df      sum_sq     mean_sq         F    PR(>F)  \
0  C(treatment_condition)  3.0  363.741704  121.247235  0.646638  0.584994   
0  C(treatment_condition)  3.0    1.577614    0.525871  0.396469  0.755549   
0  C(treatment_condition)  3.0    0.516785    0.172262  0.119749  0.948524   
0  C(treatment_condition)  3.0    2.261959    0.753986  0.918625  0.430862   
0  C(treatment_condition)  3.0    1.000450    0.333483  1.681476  0.168658   
0  C(treatment_condition)  3.0    0.111755    0.037252  0.148946  0.930402   
0  C(treatment_condition)  3.0    0.734004    0.244668  0.997489  0.392842   
0  C(treatment_condition)  3.0    0.655274    0.218425  1.214083  0.302845   

                    covariate  
0                         age  
0       political_party_coded  
0    political_ideology_coded  
0  political_engagement_coded  
0             ethnicity_white  
0               gender_female  
0                education_ba  
0            religious_binary  
