In [1]:
# Setup

## Load packages

In [2]:
import os
import pandas as pd
import matplotlib
import requests   # manage web requests
import numpy as np   # core methods package for math and to manage various data objects; many other uses
import matplotlib.pyplot as plt  # plotting library
#from sklearn import linear_model   # data science library; not loading in jupyter
import statsmodels.formula.api as sm  # statistics library
import seaborn as sns   # data visualization library
import inspect # contains getsource() function to inspect source code
import platform # to check identifying info of python, e.g., version

## Check Python Version

In [3]:
platform.python_version()

'3.7.8'

## Set Directory Structure


### Working Directory
Not really needed on jupyter container because home directory is set. All filepaths are relative to home directory.

In [5]:
print(os.getcwd()) # check current working dir
path = '/home/jovyan/'
os.chdir(path)
print(os.getcwd()) # ensure cwd changed to desired dir

/home/jovyan/code
/home/jovyan


### Make Subdirectories

In [6]:
os.makedirs('./code', exist_ok=True)
os.makedirs('./data', exist_ok=True)
os.makedirs('./figures', exist_ok=True)
os.makedirs('./tables', exist_ok=True)

### Check Directory Contents

In [7]:
os.listdir()

['.profile',
 '.bash_logout',
 '.bashrc',
 '.ipython',
 'figures',
 'code',
 '.config',
 'tables',
 '.cache',
 '.local',
 '.git',
 'data',
 'README.md',
 'environment.yml',
 '.empty',
 '.conda']

# Load Data

In [21]:
df = pd.read_csv('./data/original/metoo_data.csv')

In [10]:
df

Unnamed: 0,id,senator_party,condition,pid7,pre_sexism_1,pre_sexism_2,pre_sexism_3,pre_sexism_4,pre_favorability,pre_vote,...,post_vote,punishment_1,punishment_2,punishment_3,punishment_4,punishment_5,post_sexism_1,post_sexism_2,post_sexism_3,post_sexism_4
0,1,Democrat,2,Strong Republican,Agree strongly,Disagree strongly,Disagree somewhat,Agree strongly,5,3,...,7,Neither disagree nor agree,Agree somewhat,Disagree somewhat,Disagree somewhat,Disagree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Disagree strongly
1,2,Republican,2,Not very strong Democrat,Disagree somewhat,Agree somewhat,Neither disagree nor agree,Disagree somewhat,5,3,...,2,Disagree somewhat,Neither disagree nor agree,Disagree somewhat,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree
2,3,Republican,2,Strong Democrat,Disagree strongly,Neither disagree nor agree,Neither disagree nor agree,Disagree strongly,5,2,...,2,Disagree strongly,Agree strongly,Disagree strongly,Agree strongly,Neither disagree nor agree,Disagree strongly,Disagree strongly,Disagree somewhat,Disagree strongly
3,4,Republican,1,Independent,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree,Neither disagree nor agree,7,6,...,7,Neither disagree nor agree,Agree somewhat,Neither disagree nor agree,Disagree somewhat,Neither disagree nor agree,Agree somewhat,Agree somewhat,Disagree strongly,Neither disagree nor agree
4,5,Democrat,3,Strong Democrat,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,6,8,...,8,,,,,,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2801,2803,Democrat,1,Strong Republican,Agree somewhat,Agree somewhat,Disagree somewhat,Agree somewhat,1,2,...,3,Agree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Agree strongly,Agree strongly,Disagree strongly,Agree strongly
2802,2804,Republican,2,Strong Republican,Agree somewhat,Agree somewhat,Agree somewhat,Disagree somewhat,7,10,...,8,Agree strongly,Disagree strongly,Agree strongly,Disagree strongly,Agree strongly,Agree somewhat,Agree somewhat,Agree somewhat,Disagree somewhat
2803,2805,Republican,2,Not very strong Democrat,Agree somewhat,Agree strongly,Agree strongly,Agree somewhat,8,8,...,9,Neither disagree nor agree,Agree somewhat,Disagree strongly,Disagree somewhat,Agree strongly,Agree somewhat,Agree somewhat,Agree strongly,Agree somewhat
2804,2806,Republican,3,Strong Democrat,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,5,0,...,0,,,,,,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly


In [11]:
# can also index within objects
df.iloc[0:5,0:9]
# notice first index value in python is 0

Unnamed: 0,id,senator_party,condition,pid7,pre_sexism_1,pre_sexism_2,pre_sexism_3,pre_sexism_4,pre_favorability
0,1,Democrat,2,Strong Republican,Agree strongly,Disagree strongly,Disagree somewhat,Agree strongly,5
1,2,Republican,2,Not very strong Democrat,Disagree somewhat,Agree somewhat,Neither disagree nor agree,Disagree somewhat,5
2,3,Republican,2,Strong Democrat,Disagree strongly,Neither disagree nor agree,Neither disagree nor agree,Disagree strongly,5
3,4,Republican,1,Independent,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree,Neither disagree nor agree,7
4,5,Democrat,3,Strong Democrat,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,6


In [22]:
df.dtypes

id                    int64
senator_party        object
condition             int64
pid7                 object
pre_sexism_1         object
pre_sexism_2         object
pre_sexism_3         object
pre_sexism_4         object
pre_favorability      int64
pre_vote              int64
gender               object
age                  object
newsinterest         object
post_favorability     int64
post_vote             int64
punishment_1         object
punishment_2         object
punishment_3         object
punishment_4         object
punishment_5         object
post_sexism_1        object
post_sexism_2        object
post_sexism_3        object
post_sexism_4        object
dtype: object

# Data Processing

In [23]:
# recode experimental condition
df.loc[df['condition']==1, 'condition2'] = 'Jokes'
df.loc[df['condition']==2, 'condition2'] = 'Assault'
df.loc[df['condition']==3, 'condition2'] = 'Control'

In [29]:
df

Unnamed: 0,id,senator_party,condition,pid7,pre_sexism_1,pre_sexism_2,pre_sexism_3,pre_sexism_4,pre_favorability,pre_vote,...,punishment_1,punishment_2,punishment_3,punishment_4,punishment_5,post_sexism_1,post_sexism_2,post_sexism_3,post_sexism_4,condition2
0,1,Democrat,2,Strong Republican,Agree strongly,Disagree strongly,Disagree somewhat,Agree strongly,5,3,...,Neither disagree nor agree,Agree somewhat,Disagree somewhat,Disagree somewhat,Disagree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Assault
1,2,Republican,2,Not very strong Democrat,Disagree somewhat,Agree somewhat,Neither disagree nor agree,Disagree somewhat,5,3,...,Disagree somewhat,Neither disagree nor agree,Disagree somewhat,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree,Assault
2,3,Republican,2,Strong Democrat,Disagree strongly,Neither disagree nor agree,Neither disagree nor agree,Disagree strongly,5,2,...,Disagree strongly,Agree strongly,Disagree strongly,Agree strongly,Neither disagree nor agree,Disagree strongly,Disagree strongly,Disagree somewhat,Disagree strongly,Assault
3,4,Republican,1,Independent,Agree somewhat,Neither disagree nor agree,Neither disagree nor agree,Neither disagree nor agree,7,6,...,Neither disagree nor agree,Agree somewhat,Neither disagree nor agree,Disagree somewhat,Neither disagree nor agree,Agree somewhat,Agree somewhat,Disagree strongly,Neither disagree nor agree,Jokes
4,5,Democrat,3,Strong Democrat,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,6,8,...,,,,,,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,Control
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2801,2803,Democrat,1,Strong Republican,Agree somewhat,Agree somewhat,Disagree somewhat,Agree somewhat,1,2,...,Agree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Disagree strongly,Agree strongly,Agree strongly,Disagree strongly,Agree strongly,Jokes
2802,2804,Republican,2,Strong Republican,Agree somewhat,Agree somewhat,Agree somewhat,Disagree somewhat,7,10,...,Agree strongly,Disagree strongly,Agree strongly,Disagree strongly,Agree strongly,Agree somewhat,Agree somewhat,Agree somewhat,Disagree somewhat,Assault
2803,2805,Republican,2,Not very strong Democrat,Agree somewhat,Agree strongly,Agree strongly,Agree somewhat,8,8,...,Neither disagree nor agree,Agree somewhat,Disagree strongly,Disagree somewhat,Agree strongly,Agree somewhat,Agree somewhat,Agree strongly,Agree somewhat,Assault
2804,2806,Republican,3,Strong Democrat,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,5,0,...,,,,,,Disagree strongly,Disagree strongly,Agree strongly,Disagree strongly,Control


In [30]:
# make categorical
df['condition2'] = pd.Categorical(df['condition2'])
df.dtypes

id                      int64
senator_party          object
condition               int64
pid7                   object
pre_sexism_1           object
pre_sexism_2           object
pre_sexism_3           object
pre_sexism_4           object
pre_favorability        int64
pre_vote                int64
gender                 object
age                    object
newsinterest           object
post_favorability       int64
post_vote               int64
punishment_1           object
punishment_2           object
punishment_3           object
punishment_4           object
punishment_5           object
post_sexism_1          object
post_sexism_2          object
post_sexism_3          object
post_sexism_4          object
condition2           category
dtype: object

In [31]:
# relevel so Control is baseline category
# check order
df['condition2']
# reorder
df['condition2'].cat.reorder_categories(['Control', 'Assault', 'Jokes'], inplace=True)
# check order again
df['condition2']

0       Assault
1       Assault
2       Assault
3         Jokes
4       Control
         ...   
2801      Jokes
2802    Assault
2803    Assault
2804    Control
2805      Jokes
Name: condition2, Length: 2806, dtype: category
Categories (3, object): ['Control', 'Assault', 'Jokes']

In [None]:



dat$condition2 <- relevel(dat$condition2, "Control")

# new variable: pid3 ####
table(dat$pid7)
dat$pid3 <- NA
dat$pid3[dat$pid7=="Lean Democrat" | dat$pid7=="Strong Democrat" | dat$pid7=="Not very strong Democrat"] <- "Democrat"
dat$pid3[dat$pid7=="Lean Republican" | dat$pid7=="Strong Republican" | dat$pid7=="Not very strong Republican"] <- "Republican"
dat$pid3[dat$pid7=="Independent" | dat$pid7=="Not sure"] <- "Independent"

dat$pid3 <- as.factor(dat$pid3)

# recode: punishment ####

# punishment 1
dat$needmoreevidence[dat$punishment_1=="Agree strongly"] <- 5
dat$needmoreevidence[dat$punishment_1=="Agree somewhat"] <- 4
dat$needmoreevidence[dat$punishment_1=="Neither disagree nor agree"] <- 3
dat$needmoreevidence[dat$punishment_1=="Disagree somewhat"] <- 2
dat$needmoreevidence[dat$punishment_1=="Disagree strongly"] <- 1
table(dat$needmoreevidence)

# punishment 2
dat$apology[dat$punishment_2=="Agree strongly"] <- 5
dat$apology[dat$punishment_2=="Agree somewhat"] <- 4
dat$apology[dat$punishment_2=="Neither disagree nor agree"] <- 3
dat$apology[dat$punishment_2=="Disagree somewhat"] <- 2
dat$apology[dat$punishment_2=="Disagree strongly"] <- 1

# punishment 3
dat$longtimeago[dat$punishment_3=="Agree strongly"] <- 5
dat$longtimeago[dat$punishment_3=="Agree somewhat"] <- 4
dat$longtimeago[dat$punishment_3=="Neither disagree nor agree"] <- 3
dat$longtimeago[dat$punishment_3=="Disagree somewhat"] <- 2
dat$longtimeago[dat$punishment_3=="Disagree strongly"] <- 1
table(dat$longtimeago)


# punishment 4
dat$resign[dat$punishment_4=="Agree strongly"] <- 5
dat$resign[dat$punishment_4=="Agree somewhat"] <- 4
dat$resign[dat$punishment_4=="Neither disagree nor agree"] <- 3
dat$resign[dat$punishment_4=="Disagree somewhat"] <- 2
dat$resign[dat$punishment_4=="Disagree strongly"] <- 1


# punishment 5
dat$elitecues[dat$punishment_5=="Agree strongly"] <- 5
dat$elitecues[dat$punishment_5=="Agree somewhat"] <- 4
dat$elitecues[dat$punishment_5=="Neither disagree nor agree"] <- 3
dat$elitecues[dat$punishment_5=="Disagree somewhat"] <- 2
dat$elitecues[dat$punishment_5=="Disagree strongly"] <- 1

# recode punishment: reverse codes
# need more evidence
dat$needmoreevidence_reverse[dat$punishment_1=="Agree strongly"] <- 1
dat$needmoreevidence_reverse[dat$punishment_1=="Agree somewhat"] <- 2
dat$needmoreevidence_reverse[dat$punishment_1=="Neither disagree nor agree"] <- 3
dat$needmoreevidence_reverse[dat$punishment_1=="Disagree somewhat"] <- 4
dat$needmoreevidence_reverse[dat$punishment_1=="Disagree strongly"] <- 5
table(dat$needmoreevidence_reverse, dat$needmoreevidence)

# long time ago
dat$longtimeago_reverse[dat$longtimeago==5] <- 1
dat$longtimeago_reverse[dat$longtimeago==4] <- 2
dat$longtimeago_reverse[dat$longtimeago==3] <- 3
dat$longtimeago_reverse[dat$longtimeago==2] <- 4
dat$longtimeago_reverse[dat$longtimeago==1] <- 5 
table(dat$longtimeago_reverse, dat$longtimeago)

# new variable: mean punitiveness score ####
dat$meanpunishment <- ((dat$apology+dat$resign+dat$needmoreevidence_reverse+dat$longtimeago_reverse)/4)

## new variable: same party as legislator####
table(dat$senator_party)

dat$sameparty[dat$pid3=="Democrat" & dat$senator_party=="Democrat" | dat$pid3=="Republican" & dat$senator_party=="Republican"] <- "Same party"
dat$sameparty[dat$pid3=="Democrat" & dat$senator_party=="Republican" | dat$pid3=="Republican" & dat$senator_party=="Democrat"] <- "Opposite party"
dat$sameparty[dat$pid3=="Independent"] <- "Independents/Not sures"

dat$sameparty <- as.factor(dat$sameparty)

# recode: pre sexism ####
# sexism_1,2,4 reverse coded

dat$pre_sexism_1 <- recode(dat$pre_sexism_1,"'Agree strongly'= 5; 'Agree somewhat'= 4; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 2; 'Disagree strongly'=1")
dat$pre_sexism_1 <- as.numeric(dat$pre_sexism_1)

dat$pre_sexism_2 <- recode(dat$pre_sexism_2,"'Agree strongly'= 5; 'Agree somewhat'= 4; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 2; 'Disagree strongly'=1")
dat$pre_sexism_2 <- as.numeric(dat$pre_sexism_2)

dat$pre_sexism_4 <- recode(dat$pre_sexism_4,"'Agree strongly'= 5; 'Agree somewhat'= 4; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 2; 'Disagree strongly'=1")
dat$pre_sexism_4 <- as.numeric(dat$pre_sexism_4)

dat$pre_sexism_3 <- recode(dat$pre_sexism_3,"'Agree strongly'= 1; 'Agree somewhat'= 2; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 4; 'Disagree strongly'=5")
dat$pre_sexism_3 <- as.numeric(dat$pre_sexism_3)

# recode: post sexism ####
# sexism_1,2,4 reverse coded

dat$post_sexism_1 <- recode(dat$post_sexism_1,"'Agree strongly'= 5; 'Agree somewhat'= 4; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 2; 'Disagree strongly'=1")
dat$post_sexism_1 <- as.numeric(dat$post_sexism_1)

dat$post_sexism_2 <- recode(dat$post_sexism_2,"'Agree strongly'= 5; 'Agree somewhat'= 4; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 2; 'Disagree strongly'=1")
dat$post_sexism_2 <- as.numeric(dat$post_sexism_2)

dat$post_sexism_4 <- recode(dat$post_sexism_4,"'Agree strongly'= 5; 'Agree somewhat'= 4; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 2; 'Disagree strongly'=1")
dat$post_sexism_4 <- as.numeric(dat$post_sexism_4)

dat$post_sexism_3 <- recode(dat$post_sexism_3,"'Agree strongly'= 1; 'Agree somewhat'= 2; 'Neither disagree nor agree'= 3; 'Disagree somewhat'= 4; 'Disagree strongly'=5")
dat$post_sexism_3 <- as.numeric(dat$post_sexism_3)

# new variable: pre_sexism ####

dat$pre_sexism <- ((dat$pre_sexism_1 + dat$pre_sexism_2 + dat$pre_sexism_3 + dat$pre_sexism_4)/4)


# new variable: post_sexism ####

dat$post_sexism <- ((dat$post_sexism_1 + dat$post_sexism_2 + dat$post_sexism_3 + dat$post_sexism_4)/4)

### new variable: raw change from pretest to posttest ####
# favorability
dat$change_favorability <- (dat$post_favorability - dat$pre_favorability)
summary(dat$change_favorability)

# vote
dat$change_vote <- (dat$post_vote - dat$pre_vote)
summary(dat$change_vote)

# sexism
dat$change_sexism <- (dat$post_sexism - dat$pre_sexism)
summary(dat$change_sexism)

### new variable: percent change from pretest to posttest ##### favorability
dat$perchange_favorability <- ((((dat$post_favorability+1) - (dat$pre_favorability+1))/(dat$pre_favorability+1))*100)
summary(dat$perchange_favorability)

# vote
dat$perchange_vote <- ((((dat$post_vote+1) - (dat$pre_vote+1))/(dat$pre_vote+1))*100)
summary(dat$perchange_vote)

# sexism
dat$perchange_sexism <- ((((dat$post_sexism+1) - (dat$pre_sexism+1))/(dat$pre_sexism+1))*100)
summary(dat$perchange_sexism)

# subset: without independents/notsures ####
partydat <- subset(dat, dat$sameparty!="Independents/Not sures")

# subset: people that share party with senator, people that do not share party with senator
samepartydat <- subset(dat, dat$sameparty=="Same party")
opppartydat <- subset(dat, dat$sameparty=="Opposite party")
