In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Create mock data for demonstration
np.random.seed(42)
n = 200

categories = ['Education', 'Entertainment', 'Technology', 'Music', 'Lifestyle']
df = pd.DataFrame({
    'video_id': np.arange(n),
    'category': np.random.choice(categories, size=n),
    'engagement_score': np.random.beta(2, 5, size=n) * 100,
    'avg_sentiment_proxy': np.random.normal(loc=0.2, scale=0.1, size=n),
    'publish_date': pd.date_range(start='2024-01-01', periods=n, freq='D')
})

In [None]:
# Plot 1: Engagement Score Distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['engagement_score'], bins=30, color='blue', kde=True)
plt.title('📊 Engagement Score Distribution')
plt.xlabel('Engagement Score')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
# Plot 2: Average Sentiment Proxy Distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['avg_sentiment_proxy'], bins=30, color='green', kde=True)
plt.title('💬 Average Sentiment Proxy Distribution')
plt.xlabel('Average Sentiment Proxy')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
# Plot 3: Engagement Score by Video Category
plt.figure(figsize=(12, 6))
sns.boxplot(x='category', y='engagement_score', data=df)
plt.title('📦 Engagement Score by Video Category')
plt.xlabel('Category')
plt.ylabel('Engagement Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Plot 4: Sentiment Proxy vs. Engagement Score
plt.figure(figsize=(10, 6))
sns.scatterplot(x='avg_sentiment_proxy', y='engagement_score', data=df, hue='category')
plt.title('📈 Sentiment Proxy vs. Engagement Score')
plt.xlabel('Average Sentiment Proxy')
plt.ylabel('Engagement Score')
plt.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# Plot 5: Average Engagement Score Over Time
df['publish_date'] = pd.to_datetime(df['publish_date'], errors='coerce')
engagement_over_time = df.groupby('publish_date')['engagement_score'].mean().sort_index()

plt.figure(figsize=(12, 6))
engagement_over_time.plot(color='purple')
plt.title('🕒 Average Engagement Score Over Time')
plt.xlabel('Publish Date')
plt.ylabel('Average Engagement Score')
plt.grid(True)
plt.tight_layout()
plt.show()