In [None]:
import pandas as pd 
import numpy as np
import pymc3 as pm
from __future__ import print_function
from sklearn import metrics
from scipy import linalg, optimize, sparse
from sklearn.metrics import confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')



In [None]:
# PREPROCESSING
df  = pd.read_csv("results_259_Descriptive_norms-replication-XPLab-2021_group_7.csv")

# search all participants with valid responses and save their id
# vdf = valid dataframe, dataframe of valid participants
vdf = df.loc[df['response'] == 'Participants chose which action they preferred']
vdf = vdf[['submission_id']]

# vidf = valid identity dataframe; dataframe only with ID's of valid participants
vidf = pd.DataFrame()
for n in vdf['submission_id']:
    vidf = vidf.append(df.loc[df['submission_id'] == n])
    
# sdf = shortened dataframe
# every participants has 7 rows, the response value is in the third, so starting from the third row, get only every seventh
sdf = vidf.iloc[3::7, :]

# dataframe for ingroup agree
iadf = vidf.iloc[5::7, :]
#iadf = iadf['response']

# dataframe for outgroup disagree
oddf = vidf.iloc[6::7, :]
#oddf = oddf['response']

# dataframe with only analytically important values
newdf = sdf[['both_norms_shown','ingroup_descriptive_norm','response']]
newdf['outgroup_descriptive_norm'] = newdf['ingroup_descriptive_norm'] * -1

# add ingroup agree in binary value, 1 if >= 5, else 0
newdf['ingroup_agree'] = iadf['response'].values
newdf['ingroup_agree'] = pd.to_numeric(newdf['ingroup_agree'])
newdf['ingroup_agree'] = np.where((newdf['ingroup_agree'] >= 5), 1,newdf['ingroup_agree'])
newdf['ingroup_agree'] = np.where((newdf['ingroup_agree'] < 5), 0,newdf['ingroup_agree'])

# add outgroup disagree in binary value, 1 if >= 5, else 0
newdf['outgroup_disagree'] = oddf['response'].values
newdf['outgroup_disagree'] = pd.to_numeric(newdf['outgroup_disagree'])
newdf['outgroup_disagree'] = np.where((newdf['outgroup_disagree'] <= 3), 1,newdf['outgroup_disagree'])
newdf['outgroup_disagree'] = np.where((newdf['outgroup_disagree'] > 0), 0,newdf['outgroup_disagree'])
print(newdf)

    both_norms_shown  ingroup_descriptive_norm response  \
10                 1                         1        3   

    outgroup_descriptive_norm  ingroup_agree  outgroup_disagree  
10                         -1              0                  0  


In [None]:
# ANALYSIS

# odds of responding higher
#𝑙𝑜𝑔𝑒(odds of responding higher)=𝑏𝑖𝑛𝐼+𝑏𝑏𝑜𝑡ℎ𝐵+𝑏𝑜𝑢𝑡𝐼×𝐵
#Here, I represents the INGROUP NORM condition (and the corresponding direction of the outgroup norm),
#B represents BOTH NORMS SHOWN and I x B represents the OUTGROUP NORM, 
#while bin, bboth and bout are parameters representing the effects of changing these conditions.
# PRIORS MISSING!
newdf['odds_of_responding_higher'] = newdf['ingroup_descriptive_norm'] + newdf['both_norms_shown'] + newdf['outgroup_descriptive_norm'] * newdf['both_norms_shown']
print(newdf)

# 𝑙𝑜𝑔𝑒(odds)=𝑏𝑖𝑛𝐼×INGROUP AGREE+𝑏𝑏𝑜𝑡ℎ𝐵+𝑏𝑜𝑢𝑡𝐼×𝐵×OUTGROUP DISAGREE     
# PRIORS MISSING!
newdf['odds'] = newdf['ingroup_agree'] * newdf['ingroup_descriptive_norm'] + newdf['both_norms_shown'] + newdf['outgroup_descriptive_norm'] * newdf['both_norms_shown'] * newdf['outgroup_disagree']
print(newdf)

    both_norms_shown  ingroup_descriptive_norm response  \
10                 1                         1        3   

    outgroup_descriptive_norm  ingroup_agree  outgroup_disagree  \
10                         -1              0                  0   

    odds_of_responding_higher  
10                          1  
    both_norms_shown  ingroup_descriptive_norm response  \
10                 1                         1        3   

    outgroup_descriptive_norm  ingroup_agree  outgroup_disagree  \
10                         -1              0                  0   

    odds_of_responding_higher  odds  
10                          1     1  


In [None]:
newdf.to_csv('processed_data.csv')

####**Set Priors**

To customize the priors as needed we create numpy arrays with 'norm' that and apply clipping. Yet there is certainly a nicer way to obtain pdfs' at the end. If you have a better way, please submit it.

In [None]:
# Cut of array below threshold value
def clip_pdf(pdf,value):
  
  # Mask where pdf is lower than value 
  filter_array = pdf > value
  clipped_pdf = pdf[filter_array]
  clipped_pdf = np.append(clipped_pdf,clipped_pdf)
  # To have constant size  N
  # Apply substitutions until complete 
  if clipped_pdf.shape[0] < 10000:
    ext = np.random.choice(clipped_pdf, 10000-clipped_pdf.shape[0])
    clipped_pdf = np.append(clipped_pdf, ext)
  elif clipped_pdf.shape[0] > 10000:
    while clipped_pdf.shape[0] > 10000:
      index = np.random.randint(0,len(clipped_pdf))
      clipped_pdf = np.delete(clipped_pdf,index) 

  return clipped_pdf

In [None]:
# Create priors

# b_in scm_ self-categorization-model, alt:alternative
b_in_scm = norm.rvs(0.816, 0.5, size=10000)
b_in_alt = norm.rvs(0.816, 0.5, size=10000)

# b_out
b_out_scm = norm.rvs(0, 0.5, size=10000)
b_out_alt = b_in_alt * -(0.85/0.6)

# b_both
b_both_smc = norm.rvs(0, 0.5, size=10000)
b_both_alt = norm.rvs(0, 0.5, size=10000)

b_in_scm = clip_pdf(b_in_scm,0)
b_in_alt = clip_pdf(b_in_alt,0)
b_both_smc = clip_pdf(b_both_smc,0)

In [None]:
import seaborn as sns
sns.displot(b_both_smc)