# ⚔️ 03_brand_comparison.ipynb

Compare the three brands – Coke, Pepsi, and Fanta – based on post types, engagement, and content strategy.

In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

In [20]:
# Load content data with post_type
df = pd.read_csv("../data/content_analysis.csv")
df['post_date'] = pd.to_datetime(df['post_date'])

## 🧮 1. Engagement Metrics (Simulated)

In [21]:
# Simulate basic engagement
import numpy as np
np.random.seed(42)
df['likes'] = np.random.randint(100, 500, len(df))
df['comments'] = np.random.randint(10, 50, len(df))
df['shares'] = np.random.randint(5, 30, len(df))
df['engagement'] = df['likes'] + df['comments'] + df['shares']

In [22]:
eng_summary = df.groupby('brand')['likes', 'comments', 'shares', 'engagement'].mean().round(1)
eng_summary

## 📊 2. Post Type Distribution

In [23]:
sns.countplot(data=df, x='post_type', hue='brand')
plt.title("Post Type Distribution by Brand")
plt.xticks(rotation=15)
plt.show()

## ⏰ 3. Best Posting Times

In [24]:
df['hour'] = df['post_date'].dt.hour
sns.boxplot(data=df, x='brand', y='hour')
plt.title("Posting Time Distribution by Brand")
plt.ylabel("Hour of Day")
plt.show()

## 🗝 4. Top Keywords by Brand

In [25]:
from sklearn.feature_extraction.text import CountVectorizer
top_keywords = {}
for brand in df['brand'].unique():
    texts = df[df['brand'] == brand]['content'].dropna().astype(str)
    vec = CountVectorizer(stop_words='english', max_features=10)
    X = vec.fit_transform(texts)
    keywords = vec.get_feature_names_out()
    top_keywords[brand] = list(keywords)

pd.DataFrame(top_keywords)

## 📋 5. Summary Table & Observations

In [26]:
summary = df.groupby('brand').agg({
    'engagement': 'mean',
    'word_count': 'mean',
    'post_type': lambda x: x.value_counts().idxmax(),
    'hour': lambda x: x.mode()[0]
}).round(1)
summary.columns = ['Avg Engagement', 'Avg Word Count', 'Top Post Type', 'Most Active Hour']
summary

In [27]:
summary.to_csv("../data/brand_comparison_summary.csv")
print("✅ Comparison summary saved.")