In [None]:

# Customer Clustering Analysis
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

def prepare_clustering_features(transactions, customers):
    # Create feature matrix for clustering
    clustering_features = transactions.groupby('CustomerID').agg({
        'TotalValue': ['sum', 'mean'],
        'Quantity': ['sum', 'mean'],
        'TransactionID': 'count'
    }).fillna(0)
    
    clustering_features.columns = ['_'.join(col).strip() for col in clustering_features.columns.values]
    return clustering_features

def perform_clustering(features, n_clusters=3):
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(scaled_features)
    
    # Analyze clusters
    features['Cluster'] = clusters
    cluster_analysis = features.groupby('Cluster').mean()
    
    return clusters, cluster_analysis

# Visualization
def plot_clusters(features, clusters):
    plt.figure(figsize=(10, 6))
    scatter = plt.scatter(features['TotalValue_sum'], 
                         features['Quantity_sum'],
                         c=clusters,
                         cmap='viridis')
    plt.xlabel('Total Value')
    plt.ylabel('Total Quantity')
    plt.title('Customer Segments')
    plt.colorbar(scatter)
    plt.show()

# Main execution
customers, transactions = load_data()
clustering_features = prepare_clustering_features(transactions, customers)
clusters, cluster_analysis = perform_clustering(clustering_features)
plot_clusters(clustering_features, clusters)
