In [1]:
import pandas as pd
import json

In [2]:
df = pd.read_csv("../data/annotated_data/swift_annotated.tsv", sep='\t')

## Get the sentiment counts for each topic

In [3]:
topics = df["Category"].unique()
topic_sentiment = {}
for topic in topics:
    topic_sentiment[topic] = {}

In [4]:
for topic in topics:
    topic_df = df.loc[df["Category"] == topic]
    counts = topic_df["Sentiment"].value_counts()
    topic_sentiment[topic]["positive"] = int(counts.get(1.0, 0))
    topic_sentiment[topic]["neutral"] = int(counts.get(2.0, 0))
    topic_sentiment[topic]["negative"] = int(counts.get(3.0, 0))

In [5]:
with open("../results/topic_sentiment.json", "w") as f:
    json.dump(topic_sentiment, f, indent=4)

### Topic distribution

In [25]:
topic_percentages = {}
topic_counts = df["Category"].value_counts()
for topic in topics:
    topic_percentages[topic] = round(topic_counts.get(topic, 0) / sum(topic_counts), 2)
topic_percentages = {k: v for k, v in sorted(topic_percentages.items(), key=lambda item: item[1], reverse=True)}
topic_percentages["Music Releases"] = topic_percentages["Music Releases"] - 0.01

In [26]:
with open("../results/topic_percentages.json", "w") as f:
    json.dump(topic_percentages, f, indent=4)

## Get the total sentiment counts

In [7]:
sentiment = {}
counts = df["Sentiment"].value_counts()
sentiment["positive"] = int(counts.get(1.0, 0))
sentiment["neutral"] = int(counts.get(2.0, 0))
sentiment["negative"] = int(counts.get(3.0, 0))

In [8]:
with open("../results/sentiment_distribution.json", "w") as f:
    json.dump(sentiment, f, indent=4)

## Get Sentiment percentages

### Total percentage

In [9]:
sentiment_percentages = {}
total = sum(sentiment.values())
sentiment_percentages["positive"] = round(sentiment["positive"] / total, 2)
sentiment_percentages["neutral"] = round(sentiment["neutral"] / total, 2)
sentiment_percentages["negative"] = round(sentiment["negative"] / total, 2)

In [10]:
with open("../results/sentiment_percentages.json", "w") as f:
    json.dump(sentiment_percentages, f, indent=4)

### Within topic sentiment percentage

In [27]:
with open("../results/topic_sentiment.json", "r") as f:
    within_topic_percentages = json.load(f)
for topic in topics:
    topic_total = sum(topic_sentiment[topic].values())
    within_topic_percentages[topic]["positive"] = round(topic_sentiment[topic]["positive"] / topic_total, 2)
    within_topic_percentages[topic]["neutral"] = round(topic_sentiment[topic]["neutral"] / topic_total, 2)
    within_topic_percentages[topic]["negative"] = round(topic_sentiment[topic]["negative"] / topic_total, 2)

In [28]:
with open("../results/within_topic_percentages.json", "w") as f:
    json.dump(within_topic_percentages, f, indent=4)

### Total topic sentiment percentage

In [29]:
with open("../results/topic_sentiment.json", "r") as f:
    total_topic_percentages = json.load(f)
for topic in topics:
    total_topic_percentages[topic]["positive"] = round(total_topic_percentages[topic]["positive"] / sentiment["positive"], 2)
    total_topic_percentages[topic]["neutral"] = round(total_topic_percentages[topic]["neutral"] / sentiment["neutral"], 2)
    total_topic_percentages[topic]["negative"] = round(total_topic_percentages[topic]["negative"] / sentiment["negative"], 2)

In [30]:
with open("../results/sentiment_topic_percentages.json", "w") as f:
    json.dump(total_topic_percentages, f, indent=4)