In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

file_path = "./social_media_engagement_data.csv"
df = pd.read_csv(file_path)

# >>selecting relevant feature
features = ["Likes", "Comments", "Shares", "Impressions", "Reach", "Engagement Rate"]
df_selected = df[features]

# >>handling missing value by median
df_selected = df_selected.fillna(df_selected.median())

# >>scalling of dataset
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_selected)

# >> scaled to dataframe conversion
df_scaled = pd.DataFrame(scaled_data, columns=features)

# >> applying K-MEANS clusting 
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df_scaled["KMeans_Cluster"] = kmeans.fit_predict(scaled_data)

# >> applying DBSCAN clusting 
dbscan = DBSCAN(eps=0.5, min_samples=5)
df_scaled["DBSCAN_Cluster"] = dbscan.fit_predict(scaled_data)

# >> PCA sklearn for visualization
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(scaled_data)
df_scaled["PCA1"] = reduced_data[:, 0]
df_scaled["PCA2"] = reduced_data[:, 1]

# >> defining the plot
plt.figure(figsize=(12, 5))

# >> K-Means Plot
plt.subplot(1, 2, 1)
sns.scatterplot(x=df_scaled["PCA1"], y=df_scaled["PCA2"], hue=df_scaled["KMeans_Cluster"])
plt.title("K-Means Clustering")

# >> DBSCAN Plot
plt.subplot(1, 2, 2)
sns.scatterplot(x=df_scaled["PCA1"], y=df_scaled["PCA2"], hue=df_scaled["DBSCAN_Cluster"], palette="coolwarm")
plt.title("DBSCAN Clustering")

plt.show()

# >> print summary of clusters
print("K-Means Cluster Distribution:\n", df_scaled["KMeans_Cluster"].value_counts())
print("\nDBSCAN Cluster Distribution:\n", df_scaled["DBSCAN_Cluster"].value_counts())