In [17]:
import pandas as pd
import re
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

data_dir = Path('data')
episode_files = sorted(data_dir.glob('reddit_episode_*.csv'))

In [25]:
episode_stats = []

"""
Performance metrics:
- num_engagement = number of posts + number of comments
- homogeneity_score = score / num_posts

explanation:
- engagement metric shows how many posts are being made
- homogeneity_score is a score which shows whether audience opinions are homogenous
    - plot with standard deviation
"""

for file_path in episode_files:
    # Extract episode number from filename (e.g., reddit_episode_1.csv -> 1)
    match = re.search(r'reddit_episode_(\d+)\.csv', file_path.name)
    if match:
        episode_number = int(match.group(1))
        df = pd.read_csv(file_path)
        
        num_posts = len(df)
        num_comments = df['num_comments'].sum() if 'num_comments' in df.columns else 0
        score = df['score'].sum() if 'score' in df.columns else 0
        score_vec = df['score'].values if 'score' in df.columns else np.array([0])

        num_engagement = num_posts + num_comments
        homogeneity_score = score / num_posts if num_posts > 0 else 0
        # Standard deviation of individual post scores
        homogeneity_score_sd = np.std(score_vec / num_posts) if len(score_vec) > 0 else 0
        
        episode_stats.append({
            'episode_number': episode_number,
            'num_posts': num_posts,
            'num_comments': num_comments,
            'score': score,
            'num_engagement': num_engagement,
            'homogeneity_score': homogeneity_score,
            'homogeneity_score_sd': homogeneity_score_sd
        })

episode_analytics = pd.DataFrame(episode_stats)
episode_analytics = episode_analytics.sort_values('episode_number').reset_index(drop=True)

episode_analytics

Unnamed: 0,episode_number,num_posts,num_comments,score,num_engagement,homogeneity_score,homogeneity_score_sd
0,1,1,282,39,283,39.0,0.0
1,2,1,260,15,261,15.0,0.0
2,3,3,320,248,323,82.666667,33.85408
3,4,9,511,348,520,38.666667,4.745079
4,5,2,296,59,298,29.5,0.25
5,6,3,453,50,456,16.666667,2.439389
6,7,5,583,677,588,135.4,29.961469
7,8,3,423,119,426,39.666667,5.86473
8,9,1,366,21,367,21.0,0.0
9,10,1,418,17,419,17.0,0.0


In [50]:
episode_analytics['num_posts'].sum()
episode_analytics['num_comments'].sum()
len(episode_analytics)
episode_analytics['num_engagement'].sum() / 20 # 625.6

2251 / 625.6 * 100

episode_analytics['homogeneity_score'].loc[3:].mean()

47/2

23.5

In [42]:
# Set style for better-looking plots
plt.style.use('default')
fig_size = (12, 6)
plot_dir = Path('plot')
plot_dir.mkdir(exist_ok=True)

# 1. Bar chart: Number of engagement per episode
plt.figure(figsize=fig_size)
plt.bar(episode_analytics['episode_number'], episode_analytics['num_engagement'], 
        color='steelblue', edgecolor='black', alpha=0.7)
plt.xlabel('Episode Number', fontsize=12)
plt.ylabel('Number of Engagement', fontsize=12)
plt.title('Number of Engagement per Episode', fontsize=14, fontweight='bold')
plt.xticks(episode_analytics['episode_number'])
plt.grid(axis='y', alpha=0.3, linestyle='--')
plt.tight_layout()
plt.savefig(plot_dir / "engagement_plot.png", dpi=300, bbox_inches='tight')
plt.close()

# 2. Bar chart: Homogeneity score per episode with standard deviation error bars
plt.figure(figsize=fig_size)
bars = plt.bar(episode_analytics['episode_number'], episode_analytics['homogeneity_score'], 
               color='coral', edgecolor='black', alpha=0.7, 
               yerr=episode_analytics['homogeneity_score_sd'], capsize=5, 
               error_kw={'elinewidth': 2, 'capthick': 2})
plt.xlabel('Episode Number', fontsize=12)
plt.ylabel('Homogeneity Score', fontsize=12)
plt.title('Homogeneity Score per Episode (with Standard Deviation)', fontsize=14, fontweight='bold')
plt.xticks(episode_analytics['episode_number'])
plt.grid(axis='y', alpha=0.3, linestyle='--')
plt.tight_layout()
plt.savefig(plot_dir / "homogeneity_plot.png", dpi=300, bbox_inches='tight')
plt.close()

# 3. Line plot: Cumulative engagement over episodes
episode_analytics['cumulative_engagement'] = episode_analytics['num_engagement'].cumsum()

plt.figure(figsize=fig_size)
plt.plot(episode_analytics['episode_number'], episode_analytics['cumulative_engagement'],
         marker='o', linewidth=2, markersize=8, color='darkgreen', markerfacecolor='lightgreen',
         markeredgecolor='darkgreen', markeredgewidth=2)
plt.xlabel('Episode Number', fontsize=12)
plt.ylabel('Cumulative Engagement', fontsize=12)
plt.title('Cumulative Engagement Over Episodes', fontsize=14, fontweight='bold')
plt.xticks(episode_analytics['episode_number'])
plt.grid(alpha=0.3, linestyle='--')
plt.tight_layout()
plt.savefig(plot_dir / "cumulative_engagement_plot.png", dpi=300, bbox_inches='tight')
plt.close()

In [41]:
from matplotlib.table import Table

table_data = episode_analytics.copy()
table_data['homogeneity_score'] = table_data['homogeneity_score'].round(2)
table_data['homogeneity_score_sd'] = table_data['homogeneity_score_sd'].round(2)

table_values = table_data.astype(str).values.tolist()
table_columns = table_data.columns.tolist()

fig, ax = plt.subplots(figsize=(14, max(8, len(table_data) * 0.4)))
ax.axis('tight')
ax.axis('off')

table = ax.table(cellText=table_values,
                 colLabels=table_columns,
                 cellLoc='center',
                 loc='center',
                 bbox=[0, 0, 1, 1])

# Style the table
table.auto_set_font_size(False)
table.set_fontsize(9)
table.scale(1, 2)

for i in range(len(table_columns)):
    table[(0, i)].set_facecolor('#4CAF50')
    table[(0, i)].set_text_props(weight='bold', color='white')

for i in range(1, len(table_data) + 1):
    for j in range(len(table_columns)):
        if i % 2 == 0:
            table[(i, j)].set_facecolor('#f0f0f0')
        else:
            table[(i, j)].set_facecolor('white')

plt.title('Episode Analytics Summary', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig(plot_dir / "episode_analytics_table.png", dpi=300, bbox_inches='tight')
plt.close()