In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import davies_bouldin_score
import seaborn as sns
import matplotlib.pyplot as plt

# Load datasets
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")

# Aggregate customer transaction data
customer_aggregated = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'TransactionID': 'count'
}).reset_index()

# Merge with customer data
customer_features = customers.merge(customer_aggregated, on='CustomerID', how='left')

# Fill missing values (if any)
customer_features.fillna(0, inplace=True)

# Preprocess data
numeric_features = customer_features.drop(columns=['CustomerID', 'CustomerName', 'Region', 'SignupDate'])
scaler = StandardScaler()
scaled_features = scaler.fit_transform(numeric_features)

# K-Means Clustering
kmeans = KMeans(n_clusters=5, random_state=42)
customer_features['Cluster'] = kmeans.fit_predict(scaled_features)

# Evaluate clustering
db_index = davies_bouldin_score(scaled_features, customer_features['Cluster'])
print(f"Davies-Bouldin Index: {db_index}")

# Visualize Clusters
pca = PCA(n_components=2)
reduced_features = pca.fit_transform(scaled_features)
customer_features['PCA1'] = reduced_features[:, 0]
customer_features['PCA2'] = reduced_features[:, 1]

sns.scatterplot(
    data=customer_features, x='PCA1', y='PCA2', hue='Cluster', palette='viridis'
)
plt.title('Customer Clusters')
plt.show()

# Save clustering results
customer_features.to_csv("FirstName_LastName_Clustering.csv", index=False)
