In [None]:
# customer_segmentation.py
import pandas as pd
import numpy as np
import mysql.connector
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from datetime import datetime

# --- Database connection setup ---
db_config = {
    "host": "localhost",
    "user": "root",
    "password": "",
    "database": "laundrolink_db"
}

# Connect to DB
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor(dictionary=True)

# Fetch customer transaction data
query = """
SELECT 
    o.CustID,
    COUNT(o.OrderID) AS frequency,
    SUM(o.TotalAmount) AS total_spent,
    MAX(o.OrderDate) AS last_order_date
FROM Orders o
GROUP BY o.CustID;
"""
cursor.execute(query)
data = cursor.fetchall()

df = pd.DataFrame(data)
if df.empty:
    print("No customer data found.")
    exit()

# --- Calculate Recency (days since last order) ---
df['last_order_date'] = pd.to_datetime(df['last_order_date'])
df['recency'] = (datetime.now() - df['last_order_date']).dt.days

# --- Rename columns for easier processing ---
df.rename(columns={
    'frequency': 'Frequency',
    'total_spent': 'Monetary',
    'recency': 'Recency'
}, inplace=True)

# --- Standardize numeric values ---
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['Frequency', 'Monetary', 'Recency']])

# --- Perform KMeans clustering (4 segments) ---
kmeans = KMeans(n_clusters=4, random_state=42)
df['Segment'] = kmeans.fit_predict(scaled_features)

# --- Map cluster numbers to readable names ---
segment_map = {
    0: "High-Value Spenders",
    1: "Loyal Regulars",
    2: "At-Risk Customers",
    3: "New or Occasional"
}
df['SegmentName'] = df['Segment'].map(segment_map)

# --- Calculate averages for display ---
segment_summary = df.groupby('SegmentName').agg({
    'Monetary': 'mean',
    'Frequency': 'mean',
    'Recency': 'mean'
}).reset_index()

# --- Save segmentation results to DB ---
cursor.execute("TRUNCATE TABLE Customer_Segments")
conn.commit()

for _, row in df.iterrows():
    cursor.execute("""
        INSERT INTO Customer_Segments (CustID, SegmentName, averageSpend, averageFrequency, averageRecency)
        VALUES (%s, %s, %s, %s, %s)
    """, (int(row['CustID']), row['SegmentName'], float(row['Monetary']), float(row['Frequency']), float(row['Recency'])))

conn.commit()
cursor.close()
conn.close()

print("✅ Customer segmentation data updated successfully.")




--- Step 1: Libraries imported ---
--- Step 2: Database connection successful! ---

--- Starting Customer Segmentation Analysis ---


  df = pd.read_sql(query, db)



--- Step 3: Fetched Raw Customer Data ---
    CustID  order_frequency  total_spent  average_spend  days_since_last_order
0  C25-005                6       4357.5     726.250000                      1
1  C25-006                3       2185.0     728.333333                      8
2  C25-007                4       2283.0     570.750000                      1
3  C25-008                2        872.0     436.000000                      4
4  C25-009                2       1384.0     692.000000                      3

--- Step 4: Preparing data for clustering ---
Data scaled successfully.

--- Step 5: Performing K-Means clustering ---
Clustering complete.

--- Step 6: Cluster Analysis (Averages) ---
         order_frequency  total_spent  average_spend  days_since_last_order
cluster                                                                    
2               6.000000  4357.500000     726.250000               1.000000
3               2.333333  1714.666667     735.944444               4.