In [1]:
import pandas as pd
import scipy.stats as stats
import statsmodels.formula.api as smf
import statsmodels.stats.multicomp as multi
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import textwrap
import matplotlib.ticker as ticker
from statsmodels.stats.proportion import proportions_ztest
from matplotlib.ticker import MaxNLocator
from statsmodels.stats.multitest import multipletests
import itertools

In [2]:
# Constants
COLUMNS_TO_CONVERT = ['age_targeted', 'ethnicity_targeted', 'gender_targeted', 'education_targeted', 
                      'religious_affiliation_targeted', 'occupation_targeted', 
                      'geographic_location_targeted', 'party_affiliation_targeted', 
                      'ideological_affiliation_targeted', 'political_engagement_targeted']
COLUMNS_TO_DUMMY = ['treatment_condition', 'number_attributes_targeted']
DV_RESPONSE_COLS = ['dv_response1', 'dv_response2', 'dv_response3', 'dv_response4', 'dv_response5']

# Function to create dummy variables
def create_dummies(df, column, prefix=None):
    dummies = pd.get_dummies(df[column], prefix=prefix)
    return pd.concat([df, dummies], axis=1)

# Function to convert column values to binary
def convert_to_binary(df, columns):
    for col in columns:
        df[col] = df[col].apply(lambda x: 1 if x == 't' else 0)
    return df

# Load data
df = pd.read_csv('raw_data.csv')

# Preprocess data
df = df[df['attention_check'] == "pass"]
# Correcting the typo in ~10 values in the 'meta_perception' column
df['meta_perception'] = df['meta_perception'].replace("somehwhat_different", "somewhat_different")

for col in COLUMNS_TO_DUMMY:
    prefix = 'a' if col == 'number_attributes_targeted' else None
    df = create_dummies(df, col, prefix)
df = convert_to_binary(df, COLUMNS_TO_CONVERT)

# Reverse 'dv_response_2' and create the dependent variable measure
df['dv_response2'] = 100 - df['dv_response2']
df['dv_response_mean'] = df[DV_RESPONSE_COLS].mean(axis=1)

In [3]:
# create new dataframes split by issue containing all conditions
df_digital_privacy = df[df['issue_stance'] == "The U.S. should not implement legislation that strengthens digital privacy rights"]
df_renewable_energy = df[df['issue_stance'] == "The U.S. should increase investments in renewable energy technologies."]
df_china_sanctions = df[df['issue_stance'] == "The U.S. should impose stronger economic sanctions on China."]
df_nato_support = df[df['issue_stance'] == "The U.S. should not increase its support for NATO."]

# Create new dataframes split by issue and containing only 'microtargeting' and 'no microtargeting' conditions
df_mt_bm = df[df['treatment_condition'].isin(['microtargeting', 'no microtargeting'])]
df_mt_bm_digital_privacy = df_digital_privacy[df_digital_privacy['treatment_condition'].isin(['microtargeting', 'no microtargeting'])]
df_mt_bm_renewable_energy = df_renewable_energy[df_renewable_energy['treatment_condition'].isin(['microtargeting', 'no microtargeting'])]
df_mt_bm_china_sanctions = df_china_sanctions[df_china_sanctions['treatment_condition'].isin(['microtargeting', 'no microtargeting'])]
df_mt_bm_nato_support = df_nato_support[df_nato_support['treatment_condition'].isin(['microtargeting', 'no microtargeting'])]