# 2.1 Experiment 2A

## 2.1.1 Imports

In [None]:
# Analytical Tools
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# General Utilities
import itertools
import json
import pprint

# Custom Utilities
import utilities.processing as processing
import utilities.plotting as plotting
import utilities.describe as describe

# Some settings
log = pprint.pprint
%matplotlib inline
pd.options.mode.chained_assignment = None

## 2.1.2 Load Data

In [None]:
FILE_NAMES = [
    'data/exp-09-data/survey-eight-entries.json',
    'data/exp-09-data/survey-nine-entries.json'
]

NUM_QUESTIONS, NUM_JUDGEMENTS = 10, 7
GROUP_SIZE = NUM_QUESTIONS // 2

ALL_LABELS = processing.get_all_labels(NUM_QUESTIONS)
LOW_LABELS, HIGH_LABELS = ALL_LABELS[:GROUP_SIZE], ALL_LABELS[GROUP_SIZE:]
JUDGMENT_LABELS = processing.get_judgment_labels(NUM_JUDGEMENTS)

master_responses = []
for name in FILE_NAMES:
    with open(name) as file:
        master_responses.extend(json.loads(line) for line in file if line)
master_responses = master_responses[3:]

In [3]:
col_labels = processing.get_col_labels(NUM_QUESTIONS,
                                       NUM_JUDGEMENTS)
data = {label: [] for label in col_labels}
processing.fill_experiment_data(data, master_responses)

In [4]:
data = pd.DataFrame(data)
sample_size = len(data)
print(data.size, sample_size)

response_data = data[data.response_type == 0]
response_data = response_data[response_data.consent == 1]
response_data.head()

145644 1374


Unnamed: 0,consent,participant_id,group_number,response_type,test_one,test_two,low_q0_score,low_q0_index,low_q0_choice,low_q0_j0,...,high_q4_score,high_q4_index,high_q4_choice,high_q4_j0,high_q4_j1,high_q4_j2,high_q4_j3,high_q4_j4,high_q4_j5,high_q4_j6
9,1.0,6rdp183ar5kr3jagqdoxb7nh9d7kyd0u,0,0,0.0,1.0,35.0,9.0,0.0,0.0,...,2413.0,45.0,1.0,0.0,0.0,0.0,0.0,0.0,,0.0
12,1.0,r7gvngs3mjdqo4lb3nd6hm3b3uobqsrb,2,0,1.0,1.0,28.0,26.0,1.0,3.0,...,2411.0,42.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48,1.0,7ov3go08xy77yt73ou8tc79enl4c67mj,0,0,1.0,1.0,24.0,7.0,1.0,3.0,...,2410.0,17.0,0.0,2.0,3.0,2.0,2.0,3.0,,2.0
49,1.0,3burenqmmt71nbk9sznc0t6gadf8wrlk,2,0,1.0,1.0,23.0,48.0,1.0,3.0,...,2417.0,44.0,0.0,2.0,3.0,2.0,3.0,3.0,3.0,4.0
50,1.0,qwt23r6c9kn5tl73h6kh6kcdqm1nz4oq,0,0,1.0,1.0,16.0,27.0,1.0,6.0,...,2394.0,31.0,0.0,0.0,0.0,0.0,0.0,2.0,,0.0


In [5]:
control_responses = response_data[(response_data.group_number == 0) | (response_data.group_number == 1)]
upvote_responses = response_data[response_data.group_number == 2]

print(len(control_responses), len(upvote_responses))

299 301


In [6]:
control_responses = control_responses[(control_responses.test_one == 1) | (control_responses.test_two == 1)]
upvote_responses = upvote_responses[(upvote_responses.test_one == 1) | (upvote_responses.test_two == 1)]
print(len(control_responses), len(upvote_responses))

295 297


## 2.1.3 Analysis

In [7]:
NUM_QUESTIONS, NUM_JUDGEMENTS = 10, 7
GROUP_SIZE = NUM_QUESTIONS // 2
THRESHOLD = 240.9

CONDITIONS = ['Control', 'Post Number', 'Upvotes']
QUESTIONS = ['Curiosity', 'Confidence', 'Usefulness',
    'Popularity', 'Writing', 'Surprise', 'Social Utility']

ALL_LABELS = processing.get_all_labels(NUM_QUESTIONS)
LOW_LABELS, HIGH_LABELS = ALL_LABELS[:GROUP_SIZE], ALL_LABELS[GROUP_SIZE:]
JUDGMENT_LABELS = processing.get_judgment_labels(NUM_JUDGEMENTS)

In [8]:
for j_label in JUDGMENT_LABELS:
    low_headers = ['{}_{}'.format(q_label, j_label) for q_label in LOW_LABELS]
    high_headers = ['{}_{}'.format(q_label, j_label) for q_label in HIGH_LABELS]

    low_data = upvote_responses[low_headers].mean(axis=1)
    high_data = upvote_responses[high_headers].mean(axis=1)

    upvote_responses['{}_low'.format(j_label)] = low_data
    upvote_responses['{}_high'.format(j_label)] = high_data
    upvote_responses['{}_diff'.format(j_label)] = high_data - low_data
upvote_responses.head()

Unnamed: 0,consent,participant_id,group_number,response_type,test_one,test_two,low_q0_score,low_q0_index,low_q0_choice,low_q0_j0,...,j3_diff,j4_low,j4_high,j4_diff,j5_low,j5_high,j5_diff,j6_low,j6_high,j6_diff
12,1.0,r7gvngs3mjdqo4lb3nd6hm3b3uobqsrb,2,0,1.0,1.0,28.0,26.0,1.0,3.0,...,0.0,1.2,1.2,0.0,1.2,1.4,0.2,1.2,1.2,0.0
49,1.0,3burenqmmt71nbk9sznc0t6gadf8wrlk,2,0,1.0,1.0,23.0,48.0,1.0,3.0,...,1.2,3.4,3.6,0.2,3.4,3.6,0.2,3.8,3.6,-0.2
54,1.0,9543ar5qbz3ectkkdqoexf9lurur57za,2,0,1.0,1.0,25.0,27.0,0.0,4.0,...,4.8,2.0,3.2,1.2,3.4,2.0,-1.4,2.0,2.2,0.2
55,1.0,kd74sbkm5u7u0b0424a6fm5d76wagaw6,2,0,0.0,1.0,25.0,1.0,1.0,4.0,...,3.2,2.4,3.4,1.0,3.6,2.6,-1.0,2.6,3.0,0.4
77,1.0,hczxorcd6py0pwrty95zbp2rzqdvv66e,2,0,1.0,1.0,16.0,43.0,1.0,6.0,...,3.4,4.4,3.0,-1.4,1.6,4.2,2.6,2.6,3.4,0.8


In [20]:
for j_label in JUDGMENT_LABELS:
    low_headers = ['{}_{}'.format(q_label, j_label) for q_label in LOW_LABELS]
    high_headers = ['{}_{}'.format(q_label, j_label) for q_label in HIGH_LABELS]
    all_headers = low_headers + high_headers

    # For control {}_low, {}_high both the average of all questions
    all_data = control_responses[all_headers].mean(axis=1)
    control_responses['{}_low'.format(j_label)] = all_data
    control_responses['{}_high'.format(j_label)] = all_data
    control_responses['{}_diff'.format(j_label)] = 0
control_responses.head()

Unnamed: 0,consent,participant_id,group_number,response_type,test_one,test_two,low_q0_score,low_q0_index,low_q0_choice,low_q0_j0,...,j3_diff,j4_low,j4_high,j4_diff,j5_low,j5_high,j5_diff,j6_low,j6_high,j6_diff
9,1.0,6rdp183ar5kr3jagqdoxb7nh9d7kyd0u,0,0,0.0,1.0,35.0,9.0,0.0,0.0,...,0,0.0,0.0,0,,,0,0.0,0.0,0
48,1.0,7ov3go08xy77yt73ou8tc79enl4c67mj,0,0,1.0,1.0,24.0,7.0,1.0,3.0,...,0,3.3,3.3,0,,,0,2.8,2.8,0
50,1.0,qwt23r6c9kn5tl73h6kh6kcdqm1nz4oq,0,0,1.0,1.0,16.0,27.0,1.0,6.0,...,0,3.8,3.8,0,,,0,0.6,0.6,0
51,1.0,1a6hvsu5ghm07dusso38okbt09960cyk,0,0,1.0,1.0,24.0,10.0,0.0,3.0,...,0,4.9,4.9,0,,,0,2.3,2.3,0
58,1.0,p1nehat5h8la1qhyxhmdf83upctpjl0q,0,0,1.0,0.0,14.0,11.0,1.0,3.0,...,0,4.0,4.0,0,,,0,2.4,2.4,0


In [32]:
low, control, high = [], [], []
low_err, control_err, high_err = [], [], []

for j_label in JUDGMENT_LABELS:
    if j_label != 'j4':
        low_data = upvote_responses['{}_low'.format(j_label)]
        low.append(low_data.mean())
        low_err.append(stats.sem(low_data))

        control_data = control_responses['{}_low'.format(j_label)]
        control.append(control_data.mean())
        control_err.append(stats.sem(control_data))

        high_data = upvote_responses['{}_high'.format(j_label)]
        high.append(high_data.mean())
        high_err.append(stats.sem(high_data))

In [34]:
r_format = lambda values: ', '.join(map(str, values))

print('Means')
print(r_format(low) + ',')
print(r_format(control) + ',')
print(r_format(high))

print('Errors')
print(r_format(low_err) + ',')
print(r_format(control_err) + ',')
print(r_format(high_err))

Means
2.983164983164984, 1.7299663299663297, 2.4296296296296296, 1.827609427609427, 2.358922558922558, 2.120538720538721,
3.349152542372882, 1.721694915254238, 2.649830508474576, 2.889152542372881, nan, 2.415593220338984,
3.4383838383838388, 1.7804713804713799, 2.7649831649831627, 4.22087542087542, 2.9845117845117857, 2.593265993265991
Errors
0.07704766995023894, 0.0761468044766546, 0.07494190877181982, 0.0773848545844434, 0.06855260153760509, 0.07420865263871017,
0.07459157311856353, 0.07813852069409588, 0.07253521205245628, 0.06467555441801905, nan, 0.07264566557405276,
0.08113552957078504, 0.08088870837335543, 0.07629995629160503, 0.07084813180600136, 0.07135194230169387, 0.07778459263755035


In [35]:
PLOTTING_OPTIONS = {
    'ylabel': 'Ratings',
    'ticks': ['Curiosity', 'Confidence', 'Usefulness',
              'Popularity', 'Writing', 'Surprise', 'Social Utility'],
    'legend': ['Low Numbers', 'High Humbers'],
    'size': (12, 6)
}

In [None]:
pl