In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import davies_bouldin_score, silhouette_score
import matplotlib.pyplot as plt
import seaborn as sns

customers = pd.read_csv("/content/Customers.csv")
transactions = pd.read_csv("/content/Transactions.csv")

merged_data = pd.merge(transactions, customers, on="CustomerID")

customer_features = merged_data.groupby("CustomerID").agg({
    "Region": lambda x: x.iloc[0],
    "TotalValue": "sum",
    "Quantity": "sum",
    "ProductID": "nunique"
}).rename(columns={"ProductID": "ProductCount"}).reset_index()

customer_features["RegionEncoded"] = customer_features["Region"].astype("category").cat.codes

clustering_data = customer_features[["RegionEncoded", "TotalValue", "Quantity", "ProductCount"]]

scaler = StandardScaler()
scaled_data = scaler.fit_transform(clustering_data)

kmeans = KMeans(n_clusters=4, random_state=42)
customer_features["Cluster"] = kmeans.fit_predict(scaled_data)

db_index = davies_bouldin_score(scaled_data, kmeans.labels_)
silhouette_avg = silhouette_score(scaled_data, kmeans.labels_)

print(f"Davies-Bouldin Index: {db_index}")
print(f"Silhouette Score: {silhouette_avg}")

plt.figure(figsize=(8, 6))
sns.scatterplot(
    x=scaled_data[:, 0], y=scaled_data[:, 1], hue=customer_features["Cluster"], palette="viridis"
)
plt.title("Customer Segmentation Clusters")
plt.xlabel("Feature 1 (Region Encoded)")
plt.ylabel("Feature 2 (Scaled Total Value)")
plt.legend(title="Cluster")
plt.show()

customer_features.to_csv("Customer_Clusters.csv", index=False)
print("Clustering results saved to Customer_Clusters.csv")