In [3]:
import json

def get_percentage(num, den):
  if den == 0:
    return 0
  return round((num / den * 100), 2)

In [4]:
with open("../data/dataset.json") as jsonfile:
  data = json.load(jsonfile)

# Counting the emotions present in the dataset

In [5]:
emotions = {}

for message in data:
  for answer in message['individual_answers']:
    for emotion in answer['part2']['emotions']:
      if not emotion in emotions:
        emotions[emotion] = 0
      emotions[emotion] += 1

emotions

{'joy': 1770,
 'anger': 461,
 'fear': 181,
 'sadness': 1121,
 'negativesurprise': 307,
 'positivesurprise': 80,
 'love': 153}

# How to count (and find) the polarity of messages in the dataset

In [6]:
num_positive = len(list(filter(lambda x: x['part2_aggregate']['polarity'] == "positive", data)))
num_neutral = len(list(filter(lambda x: x['part2_aggregate']['polarity'] == "neutral", data)))
num_negative = len(list(filter(lambda x: x['part2_aggregate']['polarity'] == "negative", data)))

for message in data:
  if "discussion_polarity" in message:
    if message["discussion_polarity"] == "positive":
      num_positive += 1
    elif message["discussion_polarity"] == "negative":
      num_negative += 1
    else:
      num_neutral += 1

# Summarize the agreement between the different evaluators for each message.

In [7]:
agg_total_p1 = 0
agg_total_p2 = 0
agg_partial_p1 = 0
agg_partial_p2 = 0
agg_none_p1 = 0
agg_none_p2 = 0

for message in data:
  agreement_type = message['part1_aggregate']['agreement_type']
  if agreement_type == "undefined":
    agg_none_p1 += 1
  elif agreement_type == "all":
    agg_total_p1 += 1
  else:
    agg_partial_p1 += 1
  agreement_type = message['part2_aggregate']['agreement_type']
  if agreement_type == "undefined":
    agg_none_p2 += 1
  elif agreement_type == "all":
    agg_total_p2 += 1
  else:
    agg_partial_p2 += 1

print("agg_total_p1: {}, agg_total_p2: {}".format(agg_total_p1, agg_total_p2))
print("agg_partial_p1: {}, agg_partial_p2: {}".format(agg_partial_p1, agg_partial_p2))
print("agg_none_p1: {}, agg_none_p2: {}".format(agg_none_p1, agg_none_p2))

agg_total_p1: 1070, agg_total_p2: 1042
agg_partial_p1: 680, agg_partial_p2: 700
agg_none_p1: 41, agg_none_p2: 49
