# Facebook Marketplace Data Analysis
This notebook answers the following:
1. How time of upload affects reactions
2. Correlation between reactions, comments, shares
3. KMeans clustering
4. Elbow method
5. Count of post types
6. Averages per post type

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("Facebook_Marketplace_data.csv")

## 1. Effect of Time on `num_reactions`

In [None]:
# Convert 'status_published' to datetime and extract hour
df['status_published'] = pd.to_datetime(df['status_published'], errors='coerce')
df['publish_hour'] = df['status_published'].dt.hour

# Average reactions by hour
hourly_reactions = df.groupby('publish_hour')['num_reactions'].mean()

# Plot
plt.figure(figsize=(10,5))
hourly_reactions.plot(kind='bar', color='skyblue')
plt.title('Average Number of Reactions by Hour of Post')
plt.xlabel('Hour of Day')
plt.ylabel('Average Reactions')
plt.grid(axis='y')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

## 2. Correlation Between `num_reactions`, `num_comments`, `num_shares`

In [None]:
engagement_metrics = ['num_reactions', 'num_comments', 'num_shares']
correlation = df[engagement_metrics].corr()

# Heatmap
plt.figure(figsize=(6,4))
sns.heatmap(correlation, annot=True, cmap='coolwarm')
plt.title('Correlation Between Engagement Metrics')
plt.tight_layout()
plt.show()

## 3. K-Means Clustering

In [None]:
features = ['status_type', 'num_reactions', 'num_comments', 'num_shares',
            'num_likes', 'num_loves', 'num_wows', 'num_hahas', 'num_sads', 'num_angrys']

# One-hot encode 'status_type'
df_encoded = pd.get_dummies(df[features], columns=['status_type'])

# Scale features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_encoded)

# KMeans
kmeans = KMeans(n_clusters=3, random_state=0)
df['cluster'] = kmeans.fit_predict(scaled_data)

## 4. Elbow Method for Optimal Clusters

In [None]:
inertia = []
cluster_range = range(1, 11)

for k in cluster_range:
    kmeans = KMeans(n_clusters=k, random_state=0)
    kmeans.fit(scaled_data)
    inertia.append(kmeans.inertia_)

# Plot
plt.figure(figsize=(8,4))
plt.plot(cluster_range, inertia, marker='o')
plt.title('Elbow Method for Optimal k')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.grid(True)
plt.tight_layout()
plt.show()

## 5. Count of Different Types of Posts

In [None]:
post_type_counts = df['status_type'].value_counts()
print("Post type counts:")
print(post_type_counts)

## 6. Average Metrics per Post Type

In [None]:
avg_metrics = df.groupby('status_type')[['num_reactions', 'num_comments', 'num_shares']].mean()
print("Average reactions, comments, and shares per post type:")
print(avg_metrics)