In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML

# Load the CSV (replace with your actual file path)
data = pd.read_csv("../../data/topic_modeling/all_sources_headline_topics_sklearn.csv")

# Create a new column indicating if 'trump' is in the topic name (case insensitive)
data['has_trump'] = data['topic'].str.lower().str.contains('trump')

# Filter to only include topics ranked 1-3
top3_data = data[data['rank'] <= 3]

# Group by source and calculate the percentage of top 3 topics that mention Trump
trump_analysis = top3_data.groupby('source').agg(
    total_top3_topics=('topic', 'count'),
    trump_topics=('has_trump', 'sum')
).reset_index()

# Calculate percentage
trump_analysis['trump_percentage'] = (trump_analysis['trump_topics'] / 
                                     trump_analysis['total_top3_topics'] * 100).round(2)

# Sort by percentage (descending)
trump_analysis = trump_analysis.sort_values('trump_percentage', ascending=False)

# Rename columns for better readability
trump_analysis = trump_analysis.rename(columns={
    'source': 'Source',
    'trump_topics': '# topics mentioning Trump',
    'trump_percentage': '% topics mentioning Trump'
})

# Remove 'total_top3_topics' column
trump_analysis = trump_analysis[['Source', '# topics mentioning Trump', '% topics mentioning Trump']]

# Uppercase the source names
trump_analysis['Source'] = trump_analysis['Source'].str.upper()

# Generate a complete HTML table directly
html = """
<!DOCTYPE html>
<html>
<head>
<style>
body {
    font-family: Arial, sans-serif;
    background-color: #ffffff;
    margin: 20px;
}
table {
    border-collapse: collapse;
    width: 100%;
    max-width: 800px;
    margin: 0 auto;
    border: 1px solid #500000;
}
th {
    background-color: #6e0000;
    color: white;
    padding: 12px;
    text-align: center;
    font-weight: bold;
    border: 1px solid #500000;
}
td {
    padding: 10px;
    text-align: center;
    border: 1px solid #500000;
    background-color: #6e0000;
    color: white;
}
</style>
</head>
<body>
<table>
    <tr>
        <th>Source</th>
        <th># topics mentioning Trump</th>
        <th>% topics mentioning Trump</th>
    </tr>
"""

# Add rows with proper styling
for _, row in trump_analysis.iterrows():
    source = row['Source']
    topics = row['# topics mentioning Trump']
    percentage = row['% topics mentioning Trump']
    
    # Determine color for source
    if source == 'MSNBC':
        source_color = '#6666ff'  # Blue
    elif source == 'FOX':
        source_color = '#ff4444'  # Red
    elif source == 'ABC':
        source_color = '#cc44cc'  # Purple
    else:
        source_color = 'white'
    
    # Determine background color intensity based on percentage
    # Higher percentage = darker red
    normalized = percentage / 100
    r = int(110 + (180 - 110) * (1 - normalized))
    g = int(0 + 30 * (1 - normalized))
    b = int(0 + 30 * (1 - normalized))
    bg_color = f'rgb({r}, {g}, {b})'
    
    html += f"""
    <tr>
        <td style="color: {source_color}; font-weight: bold;">{source}</td>
        <td>{topics}</td>
        <td style="background-color: {bg_color};">{percentage}</td>
    </tr>
    """

html += """
</table>
</body>
</html>
"""

# Write to file
with open('trump_coverage_table.html', 'w') as f:
    f.write(html)

print("Table created and saved as 'trump_coverage_table.html'")

# For Jupyter notebook display
display(HTML(html))

Table created and saved as 'trump_coverage_table.html'


Source,# topics mentioning Trump,% topics mentioning Trump
MSNBC,261,71.31
FOX,104,31.23
ABC,88,23.85
