# Sentiment Word Cloud Visualization

## Imports
Add necessary imports.

In [1]:
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from wordcloud import WordCloud

## Data Loading

In [None]:
topwords_df = pd.read_parquet("data/topwords_by_topic.parquet")
avg_sentiment = pd.read_csv("data/avg_sentiment_by_source_topic.csv")

## Cloud Time
Generate word clouds displaying top 10 words by TF-IDF.

In [None]:
def get_topwords_and_sentiments(source_type, source_name, topic):
    # get top words data for a specific source-type, source-name, and topic
    row = topwords_df[
        (topwords_df["source_type"] == source_type) &
        (topwords_df["source_name"] == source_name) &
        (topwords_df["topic"] == topic)]
    if row.empty:
        return None, None, None

    # get top words of the row
    words = row.iloc[0]["top_words"]

    # get average sentiment data
    sent_row = avg_sentiment[
        (avg_sentiment["source_type"] == source_type) &
        (avg_sentiment["source_name"] == source_name) &
        (avg_sentiment["topic"] == topic)]
    if sent_row.empty:
        return words, None, None

    # now get the raw average sentument and quantile scaled sentiment
    raw_sent = sent_row.iloc[0]["avg_sentiment_score"]
    quantile_sentiment_scaled = sent_row.iloc[0]["quantile_sentiment_scaled"]
    
    return words, raw_sent, quantile_sentiment_scaled

In [None]:
def plot_wordcloud_for_source_topic_colored(source_type, source_name, topic):
    # get words and sentiment information for the topic
    words, raw_sent, color_value = get_topwords_and_sentiments(source_type, source_name, topic)

    # exit if none
    if words is None:
        print("No data for this combination.")
        return

    # dict for frequency mapping
    word_freq = {w: 1 for w in words}

    # colormap to color the words by sentiment
    cmap = matplotlib.cm.RdYlGn

    # function maps sentiment value to a color string
    def color_func(*args, **kwargs):
        r, g, b, _ = cmap(color_value)
        return f"rgb({int(r*255)},{int(g*255)},{int(b*255)})"

    # build and color word cloud
    wc = WordCloud(width=600, height=300, background_color="white").generate_from_frequencies(word_freq)

    plt.figure(figsize=(10, 6))
    plt.imshow(wc.recolor(color_func=color_func), interpolation="bilinear")
    plt.axis("off")

    # label chart
    plt.title(
        f"{source_name} -- {topic}\\n"
        f"Average Sentiment: {raw_sent:+.2f}", fontsize=16
    )
    plt.show()

In [None]:
# example
plot_wordcloud_for_source_topic_colored("news", "NPR", "topic_economy,_business_and_finance")