# 📊 02_content_analysis.ipynb

Analyze Facebook content of Coca-Cola, Pepsi, and Fanta in Vietnam from Nov 2024 to Mar 2025.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

# Load the dataset (example path, adjust if needed)
df = pd.read_csv('../data/facebook_posts.csv')
df.head()

In [None]:
# Check content types by brand
content_by_type = df.groupby(['brand', 'type']).size().unstack(fill_value=0)
content_by_type.plot(kind='bar', stacked=True, figsize=(10,6), colormap='Set2')
plt.title("Post Type Distribution by Brand")
plt.ylabel("Number of Posts")
plt.xlabel("Brand")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# Engagement overview
engagement_metrics = df.groupby('brand')[['likes', 'comments', 'shares']].mean().round(2)
engagement_metrics.plot(kind='bar', figsize=(10,6))
plt.title("Average Engagement Metrics by Brand")
plt.ylabel("Average Count")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# WordCloud by brand
for brand in df['brand'].unique():
    text = " ".join(df[df['brand'] == brand]['message'].dropna())
    wc = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10,5))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.title(f"Word Cloud - {brand}")
    plt.show()