In [None]:
import json

def get_percentage(num, den):
  if den == 0:
    return 0
  return round((num / den * 100), 2)

In [None]:
with open("../data/dataset.json") as jsonfile:
  data = json.load(jsonfile)

# How to count (and find) the polarity of messages in the dataset (Figure 2)

In [None]:
num_positive = len(list(filter(lambda x: x['part2_aggregate']['polarity'] == "positive", data)))
num_neutral = len(list(filter(lambda x: x['part2_aggregate']['polarity'] == "neutral", data)))
num_negative = len(list(filter(lambda x: x['part2_aggregate']['polarity'] == "negative", data)))

for message in data:
  if "discussion_polarity" in message:
    if message["discussion_polarity"] == "positive":
      num_positive += 1
    elif message["discussion_polarity"] == "negative":
      num_negative += 1
    else:
      num_neutral += 1

print("num_positive: {}, num_neutral: {}, num_negative: {}".format(num_positive, num_neutral, num_negative))

num_positive: 521, num_neutral: 838, num_negative: 432


# Counting the emotions present in the dataset (Figure 3)


In [None]:
emotions = {}

for message in data:
  for answer in message['individual_answers']:
    for emotion in answer['part2']['emotions']:
      if not emotion in emotions:
        emotions[emotion] = 0
      emotions[emotion] += 1

emotions

{'joy': 1770,
 'anger': 461,
 'fear': 181,
 'sadness': 1121,
 'negativesurprise': 307,
 'positivesurprise': 80,
 'love': 153}

# Summarize the agreement between the different evaluators for each message (Figure 4)

In [None]:
agg_total_p1 = 0
agg_total_p2 = 0
agg_partial_p1 = 0
agg_partial_p2 = 0
agg_none_p1 = 0
agg_none_p2 = 0

for message in data:
  agreement_type = message['part1_aggregate']['agreement_type']
  if agreement_type == "undefined":
    agg_none_p1 += 1
  elif agreement_type == "all":
    agg_total_p1 += 1
  else:
    agg_partial_p1 += 1
  agreement_type = message['part2_aggregate']['agreement_type']
  if agreement_type == "undefined":
    agg_none_p2 += 1
  elif agreement_type == "all":
    agg_total_p2 += 1
  else:
    agg_partial_p2 += 1

print("agg_total_p1: {}, agg_total_p2: {}".format(agg_total_p1, agg_total_p2))
print("agg_partial_p1: {}, agg_partial_p2: {}".format(agg_partial_p1, agg_partial_p2))
print("agg_none_p1: {}, agg_none_p2: {}".format(agg_none_p1, agg_none_p2))

agg_total_p1: 1070, agg_total_p2: 1042
agg_partial_p1: 680, agg_partial_p2: 700
agg_none_p1: 41, agg_none_p2: 49


# Summarize the difference in confidence between the different passes (Figure 5)

In [21]:
arr_total_p1 = []
arr_total_p2 = []
arr_partial_p1 = []
arr_partial_p2 = []
arr_none_p1 = []
arr_none_p2 = []

for message in data:
  agreement_type = message['part1_aggregate']['agreement_type']
  if agreement_type == "undefined":
    arr_none_p1.append(message['part1_aggregate']['avg_confidence'])
  elif agreement_type == "all":
    arr_total_p1.append(message['part1_aggregate']['avg_confidence'])
  else:
    arr_partial_p1.append(message['part1_aggregate']['avg_confidence'])
  agreement_type = message['part2_aggregate']['agreement_type']
  if agreement_type == "undefined":
    arr_none_p2.append(message['part2_aggregate']['avg_confidence'])
  elif agreement_type == "all":
    arr_total_p2.append(message['part2_aggregate']['avg_confidence'])
  else:
    arr_partial_p2.append(message['part2_aggregate']['avg_confidence'])

avg_total_p1 = sum(arr_total_p1) / len(arr_total_p1)
avg_total_p2 = sum(arr_total_p2) / len(arr_total_p2)
avg_partial_p1 = sum(arr_partial_p1) / len(arr_partial_p1)
avg_partial_p2 = sum(arr_partial_p2) / len(arr_partial_p2)
avg_none_p1 = sum(arr_none_p1) / len(arr_none_p1)
avg_none_p2 = sum(arr_none_p2) / len(arr_none_p2)

print("avg_total_p1: {:.2f}, avg_total_p2: {:.2f}".format(avg_total_p1, avg_total_p2))
print("avg_partial_p1: {:.2f}, avg_partial_p2: {:.2f}".format(avg_partial_p1, avg_partial_p2))
print("avg_none_p1: {:.2f}, avg_none_p2: {:.2f}".format(avg_none_p1, avg_none_p2))

print("avg_p1: {:.2f}, avg_p2: {:.2f}".format((avg_total_p1 + avg_partial_p1) / 2, (avg_total_p2 + avg_partial_p2) / 2))

avg_total_p1: 3.85, avg_total_p2: 4.19
avg_partial_p1: 3.37, avg_partial_p2: 3.72
avg_none_p1: 3.09, avg_none_p2: 3.56
avg_p1: 3.61, avg_p2: 3.95
