# Stance Visualization
Here we visualizes the stances taken on different issues by source using a paired dot plot.

## Imports
Necessary imports.

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display
import plotly.graph_objects as go
from scipy.stats import zscore

## Data Loading

In [None]:
df = pd.read_parquet("data/chunks_with_stances_final_repaired.parquet")

## DataFrame Transformations

In [None]:
stance_cols = [col for col in df.columns if col.startswith("stance_")]
topic_names = [col.replace("stance_", "") for col in stance_cols]

# melting dataframe by source type
melted = pd.melt(
    df,
    id_vars=["source_type"],
    value_vars=stance_cols,
    var_name="stance_topic_col",
    value_name="stance_str"
)

# extract topic name
melted["topic"] = melted["stance_topic_col"].str.replace("stance_", "")

# remove empty stances
mask = melted["stance_str"].notnull() & (melted["stance_str"] != "") & (melted["stance_str"] != "ERROR")
melted = melted[mask]

# encode stances numerically
stance_map = {"FAVOR": 1, "NONE": 0, "AGAINST": -1}
melted["stance_score"] = (
    melted["stance_str"].str.upper().map(stance_map)
)

# remove rows with invalid stance strings
melted = melted[melted["stance_score"].notnull()]

## Aggregation

In [None]:
agg_counts = melted.groupby(['topic', 'source_type']).size().unstack()
topics_with_both = agg_counts.dropna().index.tolist()
melted = melted[melted['topic'].isin(topics_with_both)]

## Normalization

In [None]:
melted["stance_score_z"] = zscore(melted["stance_score"])

# for each topic, each source_type: average z-score
agg = melted.groupby(["topic", "source_type"])["stance_score_z"].mean().reset_index()

#export
agg.to_csv("data/stance_z_agg.csv", index=False)

## Comparing means

In [None]:
agg = melted.groupby(["topic", "source_type"])["stance_score"].mean().reset_index()

In [None]:
pivot = agg.pivot(index="topic", columns="source_type", values="stance_score")
pivot = pivot.dropna()  # only topics with both
pivot["diff"] = pivot["podcast"] - pivot["news"]
order = pivot.sort_values('diff', ascending=True).index.tolist()
agg["topic"] = pd.Categorical(agg["topic"], categories=order, ordered=True)
agg = agg.sort_values("topic")

## Defining Broad Topic Categories

In [None]:
TOPIC_GROUPS = {
    "All": [],
    "People": [
        "alexandria_ocasio-cortez", "ali_khamenei", "benjamin_netanyahu", "bernie_sanders",
        "bob_menendez", "caitlin_clark", "chuck_schumer", "claudia_sheinbaum", "diddy", "donald_trump",
        "elon_musk", "jeff_bezos", "jeffrey_epstein", "joe_biden", "justin_trudeau", "kamala_harris",
        "kanye_west", "karoline_leavitt", "kevin_mccarthy", "luigi_mangione", "mark_carney", "mark_zuckerberg",
        "mike_johnson", "mitch_mcconnell", "nancy_pelosi", "pete_buttigieg", "pete_hegseth", "pope",
        "robert_f._kennedy_jr", "ron_desantis", "sam_altman", "taylor_swift", "tim_cook", "tim_walz",
        "vladimir_putin", "volodymyr_zelensky", "xi_jinping"
    ],
    "Countries": [
        "canada", "china", "el_salvador", "india", "iran", "israel", "mexico", "pakistan", "russia",
        "saudi_arabia", "taiwan", "united_kingdom", "united_states"
    ],
    "Political Issues": [
        "abortion", "capital_punishment", "climate_change", "communism", "democratic_party",
        "democratic_socialism", "euthanasia",
        "foreign_aid", "immigration", "immigration", "military_service", "nuclear_power",
        "police", "privacy", "prostitution", "racism",
        "republican_party", "tariffs", "war", "welfare"
    ],
}

## Plotting

In [None]:
def format_topic_label(topic):
    # format labels properly
    return topic.replace("_", " ").title()

def plot_zscore_relative_stances(topics):
    # standardize stance scores using z-scores
    melted["stance_score_z"] = zscore(melted["stance_score"])

    agg = (melted[melted["topic"].isin(topics)].groupby(["topic", "source_type"])["stance_score_z"].mean().reset_index())

    # count number of source types per topic
    source_counts = agg.groupby("topic")["source_type"].nunique()
    topics_with_both = source_counts[source_counts == 2].index.tolist()

    # keep only the topics that have both news and podcast coverage
    agg = agg[agg["topic"].isin(topics_with_both)]

    # pivot where rows are topic, columns are source type, and values are z-scores
    pivot = agg.pivot(index="topic", columns="source_type", values="stance_score_z")
    pivot = pivot.dropna()

    # difference between podcast and news
    pivot["diff"] = pivot["podcast"] - pivot["news"]

    # order topics by difference
    order = pivot.sort_values("diff", ascending=True).index.tolist()

    # get x-values
    news_x = pivot.loc[order, "news"].values
    podcast_x = pivot.loc[order, "podcast"].values
    diff = pivot.loc[order, "diff"].values
    abs_diff = np.abs(diff)

    # set colors for each source
    podcast_color = np.array([251, 146, 60])
    news_color = np.array([99, 102, 241])

    # helper to implement line color intensity
    def get_rgba(base_rgb, intensity):
        base = np.array(base_rgb)
        color = 0.25 * base + 0.75 * 255 * (1 - intensity) + base * intensity * 0.75
        color = np.clip(color, 0, 255)
        return f"rgba({int(color[0])},{int(color[1])},{int(color[2])},{0.4 + 0.6*intensity:.2f})"

    # for scaling, we normalize absolute differences to show more variance in color
    abs_diff_norm = abs_diff / abs_diff.max() if abs_diff.max() > 0 else abs_diff
    line_colors = []
    for d, n in zip(diff, abs_diff_norm):
        # whichever source has the more positive difference, the line is that color
        c = get_rgba(podcast_color, n) if d > 0 else get_rgba(news_color, n)
        line_colors.append(c)

    # formatting y-axis labels
    formatted_order = [format_topic_label(t) for t in order]

    # done!
    fig = go.Figure()

    # draw colored lines between sources
    for i, topic in enumerate(order):
        x0 = news_x[i]
        x1 = podcast_x[i]
        y = formatted_order[i]
        fig.add_trace(go.Scatter(
            x=[x0, x1], y=[y, y],
            mode="lines",
            line=dict(color=line_colors[i], width=6),
            showlegend=False,
            hoverinfo="skip"
        ))

    # format markers
    fig.add_trace(go.Scatter(
        x=news_x,
        y=formatted_order,
        mode="markers",
        name="news",
        marker=dict(color="#6366f1", symbol="square", size=13, line=dict(color="DarkSlateGrey", width=1)),
        hovertemplate="News<br>Relative stance (z-score): %{x:.2f}<br>Topic: %{y}<extra></extra>"
    ))
    fig.add_trace(go.Scatter(
        x=podcast_x,
        y=formatted_order,
        mode="markers",
        name="podcast",
        marker=dict(color="#fb923c", symbol="circle", size=13, line=dict(color="DarkSlateGrey", width=1)),
        hovertemplate="Podcast<br>Relative stance (z-score): %{x:.2f}<br>Topic: %{y}<extra></extra>"
    ))

    fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, title=None)
    fig.update_yaxes(categoryorder="array", categoryarray=formatted_order)

    chart_height = 28 * len(order) + 120

    fig.update_layout(
        title=dict(
            text="<span style='font-size:20px'><b>Relative Stance by Topic and Source Type</b></span>"
                 "<br><span style='font-size:15px;'>Line color represents which medium is more favorable & size of the difference</span>",
            x=0.5,
            y=0.95,
            xanchor="center",
            yanchor="top"
        ),
        yaxis_title="Topic",
        legend=dict(
            orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, title=None
        ),
        template="plotly_white",
        height=chart_height,
        margins=dict(l=150, r=120, t=110, b=40),
        annotations=[]
    )
    fig.show()

In [None]:
# usage:
plot_zscore_relative_stances(TOPIC_GROUPS["People"])