# Customer Analytics & Machine Learning

## Azure Synapse Analytics - Advanced Customer Intelligence

This notebook demonstrates:
- Customer segmentation using machine learning
- Churn prediction modeling
- Customer lifetime value estimation
- Real-time recommendation engine

**Data Sources**: Azure Data Lake, Azure SQL Database
**Compute**: Synapse Spark Pools
**ML Framework**: Azure ML, MLflow, Spark MLlib

## Setup and Configuration

In [None]:
# Import required libraries
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.ml.feature import VectorAssembler, StandardScaler, StringIndexer, OneHotEncoder
from pyspark.ml.clustering import KMeans
from pyspark.ml.classification import RandomForestClassifier, GBTClassifier
from pyspark.ml.regression import LinearRegression, RandomForestRegressor
from pyspark.ml.evaluation import BinaryClassificationEvaluator, RegressionEvaluator
from pyspark.ml import Pipeline
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Azure Synapse specific imports
from notebookutils import mssparkutils
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.model import Model
import mlflow
import mlflow.spark

# Configuration
STORAGE_ACCOUNT = "your_storage_account"
CONTAINER_NAME = "analytics"
DATA_LAKE_PATH = f"abfss://{CONTAINER_NAME}@{STORAGE_ACCOUNT}.dfs.core.windows.net"

print("📚 Libraries imported successfully")
print(f"🏗️ Data Lake Path: {DATA_LAKE_PATH}")
print(f"✨ Spark Version: {spark.version}")

## Data Loading and Exploration

In [None]:
# Load customer transaction data from Azure Data Lake
customer_transactions = spark.read \
    .format("delta") \
    .load(f"{DATA_LAKE_PATH}/gold/customer_transactions")

# Load customer demographics from Azure SQL Database
customer_demographics = spark.read \
    .format("com.microsoft.sqlserver.jdbc.spark") \
    .option("url", "jdbc:sqlserver://your-server.database.windows.net:1433;database=CustomerDB") \
    .option("dbtable", "dbo.CustomerDemographics") \
    .option("user", mssparkutils.credentials.getSecret("your-keyvault", "sql-username")) \
    .option("password", mssparkutils.credentials.getSecret("your-keyvault", "sql-password")) \
    .load()

# Load product catalog
product_catalog = spark.read \
    .format("parquet") \
    .load(f"{DATA_LAKE_PATH}/processed/product_catalog")

print(f"📊 Data loaded:")
print(f"   - Customer Transactions: {customer_transactions.count():,} records")
print(f"   - Customer Demographics: {customer_demographics.count():,} records")
print(f"   - Product Catalog: {product_catalog.count():,} records")

In [None]:
# Data exploration and quality assessment
print("🔍 Customer Transaction Data Schema:")
customer_transactions.printSchema()

print("\n📈 Customer Transaction Summary:")
customer_transactions.select(
    count("*").alias("total_transactions"),
    countDistinct("customer_id").alias("unique_customers"),
    sum("transaction_amount").alias("total_revenue"),
    avg("transaction_amount").alias("avg_transaction_amount"),
    min("transaction_date").alias("earliest_transaction"),
    max("transaction_date").alias("latest_transaction")
).show()

# Check for data quality issues
print("\n🔎 Data Quality Check:")
customer_transactions.select(
    sum(when(col("customer_id").isNull(), 1).otherwise(0)).alias("null_customer_ids"),
    sum(when(col("transaction_amount") <= 0, 1).otherwise(0)).alias("invalid_amounts"),
    sum(when(col("transaction_date").isNull(), 1).otherwise(0)).alias("null_dates")
).show()

## Feature Engineering for Machine Learning

In [None]:
def create_customer_features(transactions_df, demographics_df, reference_date=None):
    """
    Create comprehensive customer features for machine learning
    """
    if reference_date is None:
        reference_date = datetime.now().date()
    
    # Customer transaction aggregations
    customer_metrics = transactions_df.groupBy("customer_id").agg(
        count("transaction_id").alias("total_transactions"),
        sum("transaction_amount").alias("total_spent"),
        avg("transaction_amount").alias("avg_transaction_amount"),
        max("transaction_date").alias("last_transaction_date"),
        min("transaction_date").alias("first_transaction_date"),
        countDistinct("product_category").alias("unique_categories"),
        countDistinct("product_id").alias("unique_products"),
        stddev("transaction_amount").alias("transaction_amount_std"),
        
        # Time-based features
        sum(when(dayofweek(col("transaction_date")).isin([1, 7]), 
                col("transaction_amount")).otherwise(0)).alias("weekend_spending"),
        sum(when(hour(col("transaction_timestamp")).between(9, 17), 
                col("transaction_amount")).otherwise(0)).alias("business_hours_spending"),
        
        # Category preferences
        sum(when(col("product_category") == "Electronics", 
                col("transaction_amount")).otherwise(0)).alias("electronics_spending"),
        sum(when(col("product_category") == "Clothing", 
                col("transaction_amount")).otherwise(0)).alias("clothing_spending"),
        sum(when(col("product_category") == "Books", 
                col("transaction_amount")).otherwise(0)).alias("books_spending")
    )
    
    # Calculate derived features
    customer_features = customer_metrics \
        .withColumn("customer_lifetime_days", 
                   datediff(col("last_transaction_date"), col("first_transaction_date"))) \
        .withColumn("avg_days_between_purchases",
                   col("customer_lifetime_days") / greatest(col("total_transactions") - 1, lit(1))) \
        .withColumn("recency_days", 
                   datediff(lit(reference_date), col("last_transaction_date"))) \
        .withColumn("frequency_score", 
                   col("total_transactions") / greatest(col("customer_lifetime_days"), lit(1)) * 365) \
        .withColumn("monetary_score", col("total_spent")) \
        .withColumn("weekend_preference", 
                   col("weekend_spending") / col("total_spent")) \
        .withColumn("category_diversity", 
                   col("unique_categories") / lit(10.0))  # Assuming 10 total categories
    
    # Join with demographics
    complete_features = customer_features.join(
        demographics_df.select("customer_id", "age", "gender", "city", "country", 
                              "income_bracket", "education_level", "marital_status"),
        "customer_id", "left"
    )
    
    # Add age groups and income categories
    complete_features = complete_features \
        .withColumn("age_group", 
                   when(col("age") < 25, "18-24")
                   .when(col("age") < 35, "25-34")
                   .when(col("age") < 45, "35-44")
                   .when(col("age") < 55, "45-54")
                   .when(col("age") < 65, "55-64")
                   .otherwise("65+")) \
        .withColumn("is_high_value", 
                   when(col("total_spent") > 1000, 1).otherwise(0)) \
        .withColumn("is_frequent_buyer", 
                   when(col("total_transactions") > 10, 1).otherwise(0)) \
        .withColumn("churn_risk", 
                   when(col("recency_days") > 90, 1).otherwise(0))
    
    return complete_features

# Create customer features
customer_features = create_customer_features(customer_transactions, customer_demographics)

print(f"✨ Customer features created: {customer_features.count():,} customers")
print("\n🎯 Feature Summary:")
customer_features.select(
    avg("total_spent").alias("avg_total_spent"),
    avg("total_transactions").alias("avg_transactions"),
    avg("recency_days").alias("avg_recency_days"),
    sum("is_high_value").alias("high_value_customers"),
    sum("churn_risk").alias("at_risk_customers")
).show()

## Customer Segmentation with K-Means Clustering

In [None]:
# Prepare data for clustering
clustering_features = [
    "total_spent", "total_transactions", "avg_transaction_amount", 
    "recency_days", "frequency_score", "category_diversity",
    "weekend_preference", "customer_lifetime_days"
]

# Handle missing values and create feature vector
clustering_data = customer_features.na.fill({
    "total_spent": 0,
    "total_transactions": 0,
    "avg_transaction_amount": 0,
    "recency_days": 365,
    "frequency_score": 0,
    "category_diversity": 0,
    "weekend_preference": 0.5,
    "customer_lifetime_days": 0
})

# Create ML pipeline for clustering
assembler = VectorAssembler(
    inputCols=clustering_features,
    outputCol="features_raw"
)

scaler = StandardScaler(
    inputCol="features_raw",
    outputCol="features",
    withStd=True,
    withMean=True
)

# K-means clustering with different k values
print("🔬 Finding optimal number of clusters...")
silhouette_scores = []
k_values = range(2, 8)

for k in k_values:
    kmeans = KMeans(k=k, seed=42, featuresCol="features", predictionCol="cluster")
    pipeline = Pipeline(stages=[assembler, scaler, kmeans])
    model = pipeline.fit(clustering_data)
    predictions = model.transform(clustering_data)
    
    # Calculate within-cluster sum of squares
    wssse = model.stages[-1].summary.trainingCost
    silhouette_scores.append(wssse)
    print(f"k={k}: WSSSE = {wssse:.2f}")

# Use k=5 for customer segmentation (optimal based on business requirements)
optimal_k = 5
kmeans = KMeans(k=optimal_k, seed=42, featuresCol="features", predictionCol="cluster")
clustering_pipeline = Pipeline(stages=[assembler, scaler, kmeans])
clustering_model = clustering_pipeline.fit(clustering_data)
customer_segments = clustering_model.transform(clustering_data)

print(f"\n✅ Customer segmentation completed with k={optimal_k}")

# Analyze segments
print("\n📊 Customer Segment Analysis:")
segment_analysis = customer_segments.groupBy("cluster").agg(
    count("*").alias("customer_count"),
    avg("total_spent").alias("avg_total_spent"),
    avg("total_transactions").alias("avg_transactions"),
    avg("recency_days").alias("avg_recency"),
    avg("frequency_score").alias("avg_frequency")
).orderBy("cluster")

segment_analysis.show()

# Label segments based on characteristics
customer_segments_labeled = customer_segments \
    .withColumn("segment_name",
               when(col("cluster") == 0, "Champions")
               .when(col("cluster") == 1, "Loyal Customers")
               .when(col("cluster") == 2, "Potential Loyalists")
               .when(col("cluster") == 3, "At Risk")
               .when(col("cluster") == 4, "Cannot Lose Them")
               .otherwise("Others"))

## Churn Prediction Model

In [None]:
# Prepare data for churn prediction
churn_features = [
    "total_spent", "total_transactions", "avg_transaction_amount",
    "recency_days", "frequency_score", "category_diversity",
    "weekend_preference", "customer_lifetime_days", "age"
]

# Prepare categorical features
categorical_features = ["gender", "age_group", "education_level", "marital_status"]

# Create churn dataset with balanced target variable
churn_data = customer_segments_labeled.select(
    ["customer_id", "churn_risk"] + churn_features + categorical_features
).na.drop()

print(f"📈 Churn prediction dataset: {churn_data.count():,} customers")
print("\n⚖️ Churn distribution:")
churn_data.groupBy("churn_risk").count().show()

# Create ML pipeline for churn prediction
# String indexers for categorical variables
string_indexers = [
    StringIndexer(inputCol=col, outputCol=f"{col}_indexed", handleInvalid="keep")
    for col in categorical_features
]

# One-hot encoders
one_hot_encoders = [
    OneHotEncoder(inputCol=f"{col}_indexed", outputCol=f"{col}_encoded")
    for col in categorical_features
]

# Feature assembler
all_features = churn_features + [f"{col}_encoded" for col in categorical_features]
feature_assembler = VectorAssembler(
    inputCols=all_features,
    outputCol="features_raw"
)

# Feature scaler
feature_scaler = StandardScaler(
    inputCol="features_raw",
    outputCol="features",
    withStd=True,
    withMean=True
)

# Random Forest classifier
rf_classifier = RandomForestClassifier(
    featuresCol="features",
    labelCol="churn_risk",
    predictionCol="prediction",
    probabilityCol="probability",
    numTrees=100,
    seed=42
)

# Create pipeline
churn_pipeline = Pipeline(stages=(
    string_indexers + one_hot_encoders + 
    [feature_assembler, feature_scaler, rf_classifier]
))

# Split data
train_data, test_data = churn_data.randomSplit([0.8, 0.2], seed=42)

print(f"📚 Training set: {train_data.count():,} customers")
print(f"🧪 Test set: {test_data.count():,} customers")

# Train model with MLflow tracking
with mlflow.start_run(run_name="churn_prediction_rf"):
    # Log parameters
    mlflow.log_param("model_type", "RandomForest")
    mlflow.log_param("num_trees", 100)
    mlflow.log_param("features", ",".join(all_features))
    
    # Train model
    churn_model = churn_pipeline.fit(train_data)
    
    # Make predictions
    train_predictions = churn_model.transform(train_data)
    test_predictions = churn_model.transform(test_data)
    
    # Evaluate model
    evaluator = BinaryClassificationEvaluator(
        labelCol="churn_risk",
        rawPredictionCol="rawPrediction",
        metricName="areaUnderROC"
    )
    
    train_auc = evaluator.evaluate(train_predictions)
    test_auc = evaluator.evaluate(test_predictions)
    
    # Log metrics
    mlflow.log_metric("train_auc", train_auc)
    mlflow.log_metric("test_auc", test_auc)
    
    # Log model
    mlflow.spark.log_model(churn_model, "churn_model")
    
    print(f"\n🎯 Model Performance:")
    print(f"   Training AUC: {train_auc:.4f}")
    print(f"   Test AUC: {test_auc:.4f}")

# Feature importance
feature_importance = churn_model.stages[-1].featureImportances.toArray()
feature_names = all_features

importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

print("\n🔍 Top 10 Most Important Features:")
print(importance_df.head(10))

## Customer Lifetime Value Prediction

In [None]:
# Calculate historical CLV for training
clv_data = customer_segments_labeled.withColumn(
    "historical_clv",
    col("total_spent") * (lit(365) / greatest(col("customer_lifetime_days"), lit(1)))
).filter(col("customer_lifetime_days") > 30)  # Filter customers with sufficient history

# CLV prediction features
clv_features = [
    "total_transactions", "avg_transaction_amount", "frequency_score",
    "category_diversity", "weekend_preference", "age"
]

# Prepare CLV dataset
clv_prediction_data = clv_data.select(
    ["customer_id", "historical_clv"] + clv_features + categorical_features
).na.drop()

print(f"💰 CLV prediction dataset: {clv_prediction_data.count():,} customers")
print(f"📊 Average historical CLV: ${clv_prediction_data.agg(avg('historical_clv')).collect()[0][0]:.2f}")

# Create CLV prediction pipeline
clv_string_indexers = [
    StringIndexer(inputCol=col, outputCol=f"{col}_idx", handleInvalid="keep")
    for col in categorical_features
]

clv_one_hot_encoders = [
    OneHotEncoder(inputCol=f"{col}_idx", outputCol=f"{col}_enc")
    for col in categorical_features
]

clv_all_features = clv_features + [f"{col}_enc" for col in categorical_features]

clv_assembler = VectorAssembler(
    inputCols=clv_all_features,
    outputCol="features_raw"
)

clv_scaler = StandardScaler(
    inputCol="features_raw",
    outputCol="features",
    withStd=True,
    withMean=True
)

# Random Forest regressor for CLV prediction
rf_regressor = RandomForestRegressor(
    featuresCol="features",
    labelCol="historical_clv",
    predictionCol="predicted_clv",
    numTrees=100,
    seed=42
)

clv_pipeline = Pipeline(stages=(
    clv_string_indexers + clv_one_hot_encoders +
    [clv_assembler, clv_scaler, rf_regressor]
))

# Split data for CLV prediction
clv_train, clv_test = clv_prediction_data.randomSplit([0.8, 0.2], seed=42)

# Train CLV model with MLflow
with mlflow.start_run(run_name="clv_prediction_rf"):
    mlflow.log_param("model_type", "RandomForestRegressor")
    mlflow.log_param("num_trees", 100)
    mlflow.log_param("features", ",".join(clv_all_features))
    
    # Train model
    clv_model = clv_pipeline.fit(clv_train)
    
    # Make predictions
    clv_train_predictions = clv_model.transform(clv_train)
    clv_test_predictions = clv_model.transform(clv_test)
    
    # Evaluate model
    clv_evaluator = RegressionEvaluator(
        labelCol="historical_clv",
        predictionCol="predicted_clv",
        metricName="rmse"
    )
    
    train_rmse = clv_evaluator.evaluate(clv_train_predictions)
    test_rmse = clv_evaluator.evaluate(clv_test_predictions)
    
    # R-squared
    clv_evaluator.setMetricName("r2")
    train_r2 = clv_evaluator.evaluate(clv_train_predictions)
    test_r2 = clv_evaluator.evaluate(clv_test_predictions)
    
    # Log metrics
    mlflow.log_metric("train_rmse", train_rmse)
    mlflow.log_metric("test_rmse", test_rmse)
    mlflow.log_metric("train_r2", train_r2)
    mlflow.log_metric("test_r2", test_r2)
    
    # Log model
    mlflow.spark.log_model(clv_model, "clv_model")
    
    print(f"\n💰 CLV Model Performance:")
    print(f"   Training RMSE: ${train_rmse:.2f}")
    print(f"   Test RMSE: ${test_rmse:.2f}")
    print(f"   Training R²: {train_r2:.4f}")
    print(f"   Test R²: {test_r2:.4f}")

## Real-time Recommendation Engine

In [None]:
from pyspark.ml.recommendation import ALS
from pyspark.sql.functions import explode, collect_list

# Prepare data for collaborative filtering
# Create user-item ratings matrix from transaction data
user_item_ratings = customer_transactions \
    .groupBy("customer_id", "product_id") \
    .agg(
        count("transaction_id").alias("purchase_count"),
        sum("transaction_amount").alias("total_spent_on_product")
    ) \
    .withColumn("rating", 
               least(lit(5.0), 
                    (col("purchase_count") * 2 + 
                     col("total_spent_on_product") / 100) / 2))

# Create integer IDs for ALS
user_indexer = StringIndexer(inputCol="customer_id", outputCol="user_id")
item_indexer = StringIndexer(inputCol="product_id", outputCol="item_id")

user_indexed = user_indexer.fit(user_item_ratings).transform(user_item_ratings)
user_item_indexed = item_indexer.fit(user_indexed).transform(user_indexed)

print(f"🛒 Recommendation dataset: {user_item_indexed.count():,} user-item interactions")
print(f"👥 Unique users: {user_item_indexed.select('user_id').distinct().count():,}")
print(f"📦 Unique items: {user_item_indexed.select('item_id').distinct().count():,}")

# Split data
rec_train, rec_test = user_item_indexed.randomSplit([0.8, 0.2], seed=42)

# ALS model for collaborative filtering
als = ALS(
    maxIter=10,
    regParam=0.1,
    userCol="user_id",
    itemCol="item_id",
    ratingCol="rating",
    coldStartStrategy="drop",
    seed=42
)

# Train recommendation model
with mlflow.start_run(run_name="product_recommendations_als"):
    mlflow.log_param("algorithm", "ALS")
    mlflow.log_param("max_iter", 10)
    mlflow.log_param("reg_param", 0.1)
    
    rec_model = als.fit(rec_train)
    
    # Generate recommendations
    user_recs = rec_model.recommendForAllUsers(10)
    item_recs = rec_model.recommendForAllItems(10)
    
    # Evaluate model
    rec_predictions = rec_model.transform(rec_test)
    rec_evaluator = RegressionEvaluator(
        metricName="rmse",
        labelCol="rating",
        predictionCol="prediction"
    )
    rec_rmse = rec_evaluator.evaluate(rec_predictions)
    
    mlflow.log_metric("recommendation_rmse", rec_rmse)
    mlflow.spark.log_model(rec_model, "recommendation_model")
    
    print(f"\n🎯 Recommendation Model Performance:")
    print(f"   RMSE: {rec_rmse:.4f}")

# Example: Get recommendations for top customers
top_customers = customer_segments_labeled \
    .filter(col("segment_name") == "Champions") \
    .select("customer_id") \
    .limit(5)

print("\n🌟 Sample Recommendations for Champion Customers:")
sample_recommendations = user_recs.join(
    user_indexer.fit(user_item_ratings).transform(top_customers),
    "user_id"
).select("customer_id", "recommendations")

sample_recommendations.show(5, truncate=False)

## Model Deployment and Scoring

In [None]:
# Create a unified scoring pipeline
def score_customers(customer_data, churn_model, clv_model, rec_model, segment_model):
    """
    Score customers with all models for real-time insights
    """
    
    # Customer segmentation
    segmented_customers = segment_model.transform(customer_data)
    
    # Churn prediction
    churn_predictions = churn_model.transform(segmented_customers)
    
    # CLV prediction
    clv_predictions = clv_model.transform(churn_predictions)
    
    # Extract probabilities and create final scores
    final_scores = clv_predictions \
        .withColumn("churn_probability", 
                   col("probability").getItem(1)) \
        .withColumn("customer_score",
                   (col("predicted_clv") * (lit(1) - col("churn_probability"))) / 1000) \
        .withColumn("priority_tier",
                   when(col("customer_score") > 2, "Tier 1 - Highest Priority")
                   .when(col("customer_score") > 1, "Tier 2 - High Priority")
                   .when(col("customer_score") > 0.5, "Tier 3 - Medium Priority")
                   .otherwise("Tier 4 - Low Priority")) \
        .withColumn("scoring_timestamp", current_timestamp())
    
    return final_scores

# Score all customers
customer_scores = score_customers(
    customer_features,
    churn_model,
    clv_model,
    rec_model,
    clustering_model
)

print("📊 Customer Scoring Summary:")
customer_scores.groupBy("priority_tier", "segment_name") \
    .agg(
        count("*").alias("customer_count"),
        avg("customer_score").alias("avg_score"),
        avg("churn_probability").alias("avg_churn_risk"),
        avg("predicted_clv").alias("avg_predicted_clv")
    ) \
    .orderBy("priority_tier", "segment_name") \
    .show()

# Save scored customers to Azure Data Lake
output_path = f"{DATA_LAKE_PATH}/gold/customer_ml_scores"

customer_scores.select(
    "customer_id", "segment_name", "cluster", "churn_probability", 
    "predicted_clv", "customer_score", "priority_tier", "scoring_timestamp"
).write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .save(output_path)

print(f"\n✅ Customer scores saved to: {output_path}")

# Create actionable insights
actionable_insights = customer_scores.select(
    "customer_id",
    "segment_name",
    "churn_probability",
    "predicted_clv",
    "priority_tier",
    when(col("churn_probability") > 0.7, "High Risk - Immediate Intervention Required")
    .when(col("churn_probability") > 0.5, "Medium Risk - Engagement Campaign")
    .when(col("predicted_clv") > 2000, "High Value - VIP Treatment")
    .when(col("segment_name") == "At Risk", "Retention Campaign")
    .otherwise("Standard Journey").alias("recommended_action")
)

print("\n🎯 Actionable Customer Insights:")
actionable_insights.groupBy("recommended_action") \
    .agg(count("*").alias("customer_count")) \
    .orderBy(desc("customer_count")) \
    .show()

## Summary and Next Steps

In [None]:
print("🎯 Customer Analytics & ML Pipeline Summary")
print("=" * 50)

print("\n✅ Models Trained:")
print("   • Customer Segmentation (K-Means): 5 segments")
print(f"   • Churn Prediction (Random Forest): {test_auc:.3f} AUC")
print(f"   • CLV Prediction (Random Forest): {test_r2:.3f} R²")
print(f"   • Product Recommendations (ALS): {rec_rmse:.3f} RMSE")

print("\n📊 Business Impact:")
total_customers = customer_scores.count()
high_risk_customers = customer_scores.filter(col("churn_probability") > 0.7).count()
high_value_customers = customer_scores.filter(col("predicted_clv") > 2000).count()

print(f"   • Total Customers Analyzed: {total_customers:,}")
print(f"   • High Churn Risk Customers: {high_risk_customers:,} ({high_risk_customers/total_customers*100:.1f}%)")
print(f"   • High Value Customers: {high_value_customers:,} ({high_value_customers/total_customers*100:.1f}%)")

predicted_total_clv = customer_scores.agg(sum("predicted_clv")).collect()[0][0]
print(f"   • Predicted Total CLV: ${predicted_total_clv:,.2f}")

print("\n🚀 Next Steps:")
print("   • Deploy models to Azure ML for real-time scoring")
print("   • Set up automated retraining pipelines")
print("   • Create Power BI dashboards for business users")
print("   • Implement A/B testing for marketing campaigns")
print("   • Set up real-time alerts for high-risk customers")

print("\n📈 Model Artifacts Saved:")
print(f"   • Customer scores: {output_path}")
print("   • MLflow models: Available in workspace")
print("   • Feature importance: Logged in MLflow")

print("\n🔄 Recommended Refresh Schedule:")
print("   • Customer scores: Daily")
print("   • Segmentation model: Weekly")
print("   • Churn/CLV models: Monthly")
print("   • Recommendation model: Weekly")