In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("/content/drive/MyDrive/Case Studies/HSBC/datasets/clustered_data.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,CustGender,CustLocation,CustAccountBalance,TransactionAmount,Age,BalTransRatio,TransactionMonth,Cluster
0,2,0,MUMBAI,17874.44,459.0,29,0.025679,2,2
1,6,0,MUMBAI,973.46,566.0,33,0.581431,2,1
2,8,0,GURGAON,14906.96,833.0,37,0.05588,2,2
3,9,1,MUMBAI,4279.22,289.11,41,0.067561,2,1
4,10,1,MOHALI,48429.49,259.0,43,0.005348,2,0


In [None]:
if 'Unnamed: 0' in data.columns:
    data = data.drop(columns=['Unnamed: 0'])
data.columns

Index(['CustGender', 'CustLocation', 'CustAccountBalance', 'TransactionAmount',
       'Age', 'BalTransRatio', 'TransactionMonth', 'Cluster'],
      dtype='object')

###Customer Lifetime Value Analysis (CLV) Report

In [None]:
#Calculate comprehensive CLV metrics by customer segment

class CLVAnalyzer:
    def __init__(self, data):
        self.data = data

    def calculate_clv_metrics(self):

        # Basic segment analysis
        segment_metrics = self.data.groupby('Cluster').agg({
            'TransactionAmount': ['mean', 'count', 'std', 'sum'],
            'CustAccountBalance': ['mean', 'std'],
            'Age': ['mean', 'std'],
            'BalTransRatio': ['mean', 'std']
        }).round(2)

        # Flatten column names
        segment_metrics.columns = ['_'.join(col).strip() for col in segment_metrics.columns]

        # Calculate business metrics
        segment_metrics['avg_monthly_transactions'] = segment_metrics['TransactionAmount_count'] / 12
        segment_metrics['annual_transaction_volume'] = segment_metrics['TransactionAmount_sum'] * 12

        # Revenue assumptions (typical banking metrics)
        transaction_fee_rate = 0.015  # 1.5% per transaction
        balance_interest_margin = 0.02  # 2% net interest margin

        # Calculate revenue streams
        segment_metrics['annual_transaction_revenue'] = (
            segment_metrics['annual_transaction_volume'] * transaction_fee_rate
        )

        segment_metrics['annual_balance_revenue'] = (
            segment_metrics['CustAccountBalance_mean'] * balance_interest_margin
        )

        segment_metrics['total_annual_revenue_per_customer'] = (
            segment_metrics['annual_transaction_revenue'] +
            segment_metrics['annual_balance_revenue']
        )

        # Customer lifecycle assumptions
        avg_customer_lifespan = {'0': 3.5, '1': 5.2, '2': 4.1}  # years, based on segment behavior

        segment_metrics['estimated_lifespan_years'] = segment_metrics.index.map(
            lambda x: avg_customer_lifespan[str(x)]
        )

        # Calculate CLV with discount rate (10% annual)
        discount_rate = 0.10
        segment_metrics['customer_lifetime_value'] = (
            segment_metrics['total_annual_revenue_per_customer'] *
            segment_metrics['estimated_lifespan_years'] *
            (1 - discount_rate)
        )

        return segment_metrics

    def segment_business_profiles(self):
        """Create detailed business profiles for each segment"""

        profiles = {}

        # Get customer counts per segment
        segment_counts = self.data['Cluster'].value_counts().sort_index()

        for cluster in sorted(self.data['Cluster'].unique()):
            cluster_data = self.data[self.data['Cluster'] == cluster]

            profiles[f'Segment_{cluster}'] = {
                'customer_count': len(cluster_data),
                'percentage_of_base': (len(cluster_data) / len(self.data)) * 100,
                'avg_account_balance': cluster_data['CustAccountBalance'].mean(),
                'avg_transaction_amount': cluster_data['TransactionAmount'].mean(),
                'transaction_frequency': cluster_data['TransactionAmount'].count(),
                'gender_split': cluster_data['CustGender'].value_counts(normalize=True).to_dict(),
                'age_range': f"{cluster_data['Age'].min()}-{cluster_data['Age'].max()}",
                'avg_age': cluster_data['Age'].mean(),
                'top_locations': cluster_data['CustLocation'].value_counts().head(3).to_dict()
            }

        return profiles

# Initialize CLV Analyzer
clv_analyzer = CLVAnalyzer(data)
clv_metrics = clv_analyzer.calculate_clv_metrics()
business_profiles = clv_analyzer.segment_business_profiles()

print("\n=== CUSTOMER LIFETIME VALUE BY SEGMENT ===")
print(clv_metrics[['total_annual_revenue_per_customer', 'customer_lifetime_value', 'estimated_lifespan_years']].round(2))



=== CUSTOMER LIFETIME VALUE BY SEGMENT ===
         total_annual_revenue_per_customer  customer_lifetime_value  \
Cluster                                                               
0                               2177052.89               6857716.61   
1                               7144191.04              33434814.07   
2                               4006146.07              14782678.99   

         estimated_lifespan_years  
Cluster                            
0                             3.5  
1                             5.2  
2                             4.1  


###Churn Risk Prediction Model

In [None]:
#create_churn_risk_features: Engineer features indicative of churn risk
#train_churn_model: Train churn prediction model
#segment_churn_analysis: Analyze churn risk by customer segment


class ChurnRiskPredictor:
    def __init__(self, data):
        self.data = data
        self.model = None
        self.scaler = StandardScaler()

    def create_churn_risk_features(self):

        df = self.data.copy()

        # Risk indicators based on banking domain knowledge
        df['low_balance_risk'] = (df['CustAccountBalance'] < df['CustAccountBalance'].quantile(0.25)).astype(int)
        df['high_transaction_ratio_risk'] = (df['BalTransRatio'] > df['BalTransRatio'].quantile(0.75)).astype(int)
        df['age_risk'] = ((df['Age'] < 25) | (df['Age'] > 65)).astype(int)
        df['infrequent_user_risk'] = (df['TransactionAmount'] < df['TransactionAmount'].quantile(0.25)).astype(int)

        # Create synthetic churn labels based on risk factors (for demonstration)
        # In real scenario, this would come from historical churn data
        df['churn_risk_score'] = (
            df['low_balance_risk'] * 0.3 +
            df['high_transaction_ratio_risk'] * 0.25 +
            df['age_risk'] * 0.2 +
            df['infrequent_user_risk'] * 0.25
        )

        # Binary churn risk (high risk = 1, low risk = 0)
        df['high_churn_risk'] = (df['churn_risk_score'] > 0.5).astype(int)

        return df

    def train_churn_model(self):

        df = self.create_churn_risk_features()

        # Features for churn prediction
        feature_cols = ['CustAccountBalance', 'TransactionAmount', 'Age', 'BalTransRatio',
                       'CustGender', 'TransactionMonth', 'Cluster']

        X = df[feature_cols]
        y = df['high_churn_risk']

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        # Train Random Forest model
        self.model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
        self.model.fit(X_train_scaled, y_train)

        # Predictions
        y_pred = self.model.predict(X_test_scaled)
        y_prob = self.model.predict_proba(X_test_scaled)[:, 1]

        # Model performance
        print("\n=== CHURN RISK MODEL PERFORMANCE ===")
        print(f"ROC-AUC Score: {roc_auc_score(y_test, y_prob):.3f}")
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred))

        # Feature importance
        feature_importance = pd.DataFrame({
            'feature': feature_cols,
            'importance': self.model.feature_importances_
        }).sort_values('importance', ascending=False)

        print("\nTop Features for Churn Prediction:")
        print(feature_importance.head())

        return df

    def segment_churn_analysis(self, df):

        churn_by_segment = df.groupby('Cluster').agg({
            'high_churn_risk': ['count', 'sum', 'mean'],
            'churn_risk_score': 'mean'
        }).round(3)

        churn_by_segment.columns = ['total_customers', 'high_risk_customers', 'churn_rate', 'avg_risk_score']

        return churn_by_segment

# Initialize Churn Predictor
churn_predictor = ChurnRiskPredictor(data)
enriched_data = churn_predictor.train_churn_model()
churn_analysis = churn_predictor.segment_churn_analysis(enriched_data)

print("\n=== CHURN RISK BY SEGMENT ===")
print(churn_analysis)


=== CHURN RISK MODEL PERFORMANCE ===
ROC-AUC Score: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     44958
           1       1.00      1.00      1.00     13821

    accuracy                           1.00     58779
   macro avg       1.00      1.00      1.00     58779
weighted avg       1.00      1.00      1.00     58779


Top Features for Churn Prediction:
              feature  importance
0  CustAccountBalance    0.590881
3       BalTransRatio    0.287361
6             Cluster    0.083741
1   TransactionAmount    0.036086
2                 Age    0.001239

=== CHURN RISK BY SEGMENT ===
         total_customers  high_risk_customers  churn_rate  avg_risk_score
Cluster                                                                  
0                  29990                    0       0.000           0.051
1                 108755                46068       0.424           0.317
2                  5718

###Product Recommendation Engine

In [None]:
#define_product_portfolio: Define bank's product portfolio with segment affinity
#calculate_product_recommendations: Calculate next-best-product for each segment

class ProductRecommendationEngine:
    def __init__(self, data):
        self.data = data

    def define_product_portfolio(self):

        products = {
            'Credit_Card': {
                'segment_affinity': {0: 0.3, 1: 0.8, 2: 0.6},
                'revenue_per_customer': 1200,
                'acquisition_cost': 150
            },
            'Personal_Loan': {
                'segment_affinity': {0: 0.2, 1: 0.6, 2: 0.7},
                'revenue_per_customer': 2400,
                'acquisition_cost': 300
            },
            'Investment_Portfolio': {
                'segment_affinity': {0: 0.1, 1: 0.9, 2: 0.4},
                'revenue_per_customer': 3600,
                'acquisition_cost': 500
            },
            'Premium_Banking': {
                'segment_affinity': {0: 0.1, 1: 0.8, 2: 0.3},
                'revenue_per_customer': 4800,
                'acquisition_cost': 800
            },
            'Mobile_Banking_Plus': {
                'segment_affinity': {0: 0.7, 1: 0.5, 2: 0.8},
                'revenue_per_customer': 360,
                'acquisition_cost': 50
            }
        }

        return products

    def calculate_product_recommendations(self):

        products = self.define_product_portfolio()
        segment_counts = self.data['Cluster'].value_counts()

        recommendations = {}

        for segment in sorted(self.data['Cluster'].unique()):
            segment_size = segment_counts[segment]
            segment_recs = []

            for product, details in products.items():
                affinity = details['segment_affinity'][segment]
                expected_customers = segment_size * affinity

                revenue = expected_customers * details['revenue_per_customer']
                cost = expected_customers * details['acquisition_cost']
                roi = (revenue - cost) / cost if cost > 0 else 0

                segment_recs.append({
                    'product': product,
                    'affinity_score': affinity,
                    'expected_adopters': int(expected_customers),
                    'projected_revenue': revenue,
                    'acquisition_cost': cost,
                    'roi': roi
                })

            # Sort by ROI
            segment_recs.sort(key=lambda x: x['roi'], reverse=True)
            recommendations[f'Segment_{segment}'] = segment_recs

        return recommendations

# Initialize Product Recommendation Engine
product_engine = ProductRecommendationEngine(data)
product_recommendations = product_engine.calculate_product_recommendations()

print("\n=== NEXT-BEST-PRODUCT RECOMMENDATIONS ===")
for segment, recs in product_recommendations.items():
    print(f"\n{segment} Top 3 Recommendations:")
    for i, rec in enumerate(recs[:3], 1):
        print(f"{i}. {rec['product']}: ROI {rec['roi']:.1%}, Revenue ₹{rec['projected_revenue']:,.0f}")



=== NEXT-BEST-PRODUCT RECOMMENDATIONS ===

Segment_0 Top 3 Recommendations:
1. Credit_Card: ROI 700.0%, Revenue ₹10,796,400
2. Personal_Loan: ROI 700.0%, Revenue ₹14,395,200
3. Investment_Portfolio: ROI 620.0%, Revenue ₹10,796,400

Segment_1 Top 3 Recommendations:
1. Credit_Card: ROI 700.0%, Revenue ₹104,404,800
2. Personal_Loan: ROI 700.0%, Revenue ₹156,607,200
3. Investment_Portfolio: ROI 620.0%, Revenue ₹352,366,200

Segment_2 Top 3 Recommendations:
1. Credit_Card: ROI 700.0%, Revenue ₹41,171,760
2. Personal_Loan: ROI 700.0%, Revenue ₹96,067,440
3. Investment_Portfolio: ROI 620.0%, Revenue ₹82,343,520


###A/B Testing Framework

In [None]:
#design_segment_experiments: Design A/B tests for each customer segmen
#calculate_test_requirements: Calculate sample size and power analysis for experiments

class ABTestingFramework:
    def __init__(self, data):
        self.data = data

    def design_segment_experiments(self):

        experiments = {
            'Segment_0': {
                'experiment_name': 'Mobile Banking Adoption',
                'hypothesis': 'Personalized mobile app tutorials increase transaction frequency',
                'control_group_size': 0.5,
                'test_duration_days': 60,
                'primary_metric': 'transaction_frequency',
                'success_criteria': 'increase_by_15_percent',
                'estimated_impact': {
                    'revenue_uplift': 180,  # per customer annually
                    'confidence_level': 0.95
                }
            },
            'Segment_1': {
                'experiment_name': 'Premium Product Cross-sell',
                'hypothesis': 'Targeted investment product offers increase portfolio adoption',
                'control_group_size': 0.5,
                'test_duration_days': 90,
                'primary_metric': 'product_adoption_rate',
                'success_criteria': 'increase_by_25_percent',
                'estimated_impact': {
                    'revenue_uplift': 1200,  # per customer annually
                    'confidence_level': 0.95
                }
            },
            'Segment_2': {
                'experiment_name': 'Credit Product Optimization',
                'hypothesis': 'Simplified credit application process increases approval rates',
                'control_group_size': 0.5,
                'test_duration_days': 45,
                'primary_metric': 'credit_application_rate',
                'success_criteria': 'increase_by_20_percent',
                'estimated_impact': {
                    'revenue_uplift': 800,  # per customer annually
                    'confidence_level': 0.95
                }
            }
        }

        return experiments

    def calculate_test_requirements(self, experiments):

        segment_counts = self.data['Cluster'].value_counts()

        test_design = {}

        for segment_name, exp_details in experiments.items():
            segment_id = int(segment_name.split('_')[1])
            total_customers = segment_counts[segment_id]

            # Sample size calculation (simplified)
            control_size = int(total_customers * exp_details['control_group_size'])
            test_size = total_customers - control_size

            test_design[segment_name] = {
                'total_eligible_customers': total_customers,
                'control_group_size': control_size,
                'test_group_size': test_size,
                'test_duration': exp_details['test_duration_days'],
                'expected_revenue_impact': exp_details['estimated_impact']['revenue_uplift'] * test_size,
                'experiment_name': exp_details['experiment_name']
            }

        return test_design

# Initialize A/B Testing Framework
ab_framework = ABTestingFramework(data)
experiments = ab_framework.design_segment_experiments()
test_design = ab_framework.calculate_test_requirements(experiments)

print("\n=== A/B TESTING FRAMEWORK ===")
for segment, design in test_design.items():
    print(f"\n{segment} - {design['experiment_name']}:")
    print(f"  Control Group: {design['control_group_size']:,} customers")
    print(f"  Test Group: {design['test_group_size']:,} customers")
    print(f"  Expected Revenue Impact: ₹{design['expected_revenue_impact']:,.0f}")


=== A/B TESTING FRAMEWORK ===

Segment_0 - Mobile Banking Adoption:
  Control Group: 14,995 customers
  Test Group: 14,995 customers
  Expected Revenue Impact: ₹2,699,100

Segment_1 - Premium Product Cross-sell:
  Control Group: 54,377 customers
  Test Group: 54,378 customers
  Expected Revenue Impact: ₹65,253,600

Segment_2 - Credit Product Optimization:
  Control Group: 28,591 customers
  Test Group: 28,592 customers
  Expected Revenue Impact: ₹22,873,600


###Model Monitoring & Drift Detection

In [None]:
#detect_data_drift: Detect statistical drift in incoming data
#performance_monitoring: Monitor model performance degradation

class ModelMonitoringSystem:
    def __init__(self, reference_data):
        self.reference_data = reference_data

    def detect_data_drift(self, new_data, threshold=0.05):
        """Detect statistical drift in incoming data"""
        from scipy.stats import ks_2samp, chi2_contingency

        drift_results = {}

        # Numerical features drift detection using KS test
        numerical_features = ['CustAccountBalance', 'TransactionAmount', 'Age', 'BalTransRatio']

        for feature in numerical_features:
            if feature in new_data.columns:
                statistic, p_value = ks_2samp(
                    self.reference_data[feature],
                    new_data[feature]
                )

                drift_results[feature] = {
                    'test': 'KS_test',
                    'statistic': statistic,
                    'p_value': p_value,
                    'drift_detected': p_value < threshold,
                    'severity': 'HIGH' if p_value < 0.01 else 'MEDIUM' if p_value < 0.05 else 'LOW'
                }

        # Categorical features drift detection
        categorical_features = ['CustGender', 'Cluster']

        for feature in categorical_features:
            if feature in new_data.columns:
                ref_dist = self.reference_data[feature].value_counts(normalize=True)
                new_dist = new_data[feature].value_counts(normalize=True)

                # Align distributions
                all_categories = set(ref_dist.index) | set(new_dist.index)
                ref_aligned = [ref_dist.get(cat, 0) for cat in all_categories]
                new_aligned = [new_dist.get(cat, 0) for cat in all_categories]

                # Chi-square test
                try:
                    chi2, p_value, _, _ = chi2_contingency([ref_aligned, new_aligned])

                    drift_results[feature] = {
                        'test': 'Chi2_test',
                        'statistic': chi2,
                        'p_value': p_value,
                        'drift_detected': p_value < threshold,
                        'severity': 'HIGH' if p_value < 0.01 else 'MEDIUM' if p_value < 0.05 else 'LOW'
                    }
                except:
                    drift_results[feature] = {
                        'test': 'Chi2_test',
                        'error': 'Could not compute chi-square test'
                    }

        return drift_results

    def performance_monitoring(self, model, new_data, new_labels, performance_threshold=0.85):
        """Monitor model performance degradation"""

        # Get predictions on new data
        predictions = model.predict(new_data)
        current_accuracy = (predictions == new_labels).mean()

        monitoring_results = {
            'current_accuracy': current_accuracy,
            'performance_threshold': performance_threshold,
            'performance_degradation': current_accuracy < performance_threshold,
            'retrain_recommended': current_accuracy < performance_threshold,
            'degradation_severity': 'HIGH' if current_accuracy < 0.7 else 'MEDIUM' if current_accuracy < 0.8 else 'LOW'
        }

        return monitoring_results

# Initialize Model Monitoring
monitor = ModelMonitoringSystem(data)

# Simulate new data for drift detection (using a subset with some modifications)
np.random.seed(42)
new_data_simulation = data.sample(1000).copy()
new_data_simulation['CustAccountBalance'] *= np.random.normal(1.1, 0.1, len(new_data_simulation))  # Simulate drift

drift_results = monitor.detect_data_drift(new_data_simulation)

print("\n=== MODEL MONITORING RESULTS ===")
print("Data Drift Detection:")
for feature, result in drift_results.items():
    if 'drift_detected' in result:
        status = "DRIFT DETECTED" if result['drift_detected'] else "NO DRIFT"
        print(f"{feature}: {status} (p-value: {result['p_value']:.4f}, Severity: {result['severity']})")



=== MODEL MONITORING RESULTS ===
Data Drift Detection:
CustAccountBalance: DRIFT DETECTED (p-value: 0.0052, Severity: HIGH)
TransactionAmount: NO DRIFT (p-value: 0.5204, Severity: LOW)
Age: DRIFT DETECTED (p-value: 0.0296, Severity: MEDIUM)
BalTransRatio: NO DRIFT (p-value: 0.8112, Severity: LOW)
CustGender: NO DRIFT (p-value: 1.0000, Severity: LOW)
Cluster: NO DRIFT (p-value: 0.9998, Severity: LOW)


###Business Impact Summary

In [None]:
#calculate_total_business_impact: Calculate comprehensive business impact

class BusinessImpactCalculator:
    def __init__(self, clv_metrics, product_recommendations, test_design, data):
        self.clv_metrics = clv_metrics
        self.product_recommendations = product_recommendations
        self.test_design = test_design
        self.data = data

    def calculate_total_business_impact(self):

        segment_counts = self.data['Cluster'].value_counts()

        # Current business value
        current_value = 0
        for segment in range(3):
            segment_size = segment_counts[segment]
            clv_per_customer = self.clv_metrics.loc[segment, 'customer_lifetime_value']
            current_value += segment_size * clv_per_customer

        # Potential value from product recommendations
        product_value = 0
        for segment_name, recs in self.product_recommendations.items():
            top_product = recs[0]  # Best ROI product
            product_value += top_product['projected_revenue']

        # A/B testing potential uplift
        ab_testing_value = sum(design['expected_revenue_impact'] for design in self.test_design.values())

        # Churn prevention value (assume 5% churn rate reduction saves 10% of CLV)
        churn_prevention_value = current_value * 0.05 * 0.10

        impact_summary = {
            'current_customer_base_value': current_value,
            'product_cross_sell_opportunity': product_value,
            'ab_testing_uplift_potential': ab_testing_value,
            'churn_prevention_value': churn_prevention_value,
            'total_additional_value_potential': product_value + ab_testing_value + churn_prevention_value,
            'roi_multiple': (product_value + ab_testing_value + churn_prevention_value) / current_value
        }

        return impact_summary

# Calculate Business Impact
impact_calculator = BusinessImpactCalculator(
    clv_metrics, product_recommendations, test_design, data
)
business_impact = impact_calculator.calculate_total_business_impact()

print("COMPREHENSIVE BUSINESS IMPACT ANALYSIS")

print(f"\nCurrent Customer Base Value: ₹{business_impact['current_customer_base_value']:,.0f}")
print(f"Product Cross-sell Opportunity: ₹{business_impact['product_cross_sell_opportunity']:,.0f}")
print(f"A/B Testing Uplift Potential: ₹{business_impact['ab_testing_uplift_potential']:,.0f}")
print(f"Churn Prevention Value: ₹{business_impact['churn_prevention_value']:,.0f}")
print(f"\nTotal Additional Value Potential: ₹{business_impact['total_additional_value_potential']:,.0f}")
print(f"ROI Multiple: {business_impact['roi_multiple']:.1f}x")

COMPREHENSIVE BUSINESS IMPACT ANALYSIS

Current Customer Base Value: ₹4,687,184,058,019
Product Cross-sell Opportunity: ₹156,372,960
A/B Testing Uplift Potential: ₹90,826,300
Churn Prevention Value: ₹23,435,920,290

Total Additional Value Potential: ₹23,683,119,550
ROI Multiple: 0.0x


###CRM Strategy Recommendations

IMMEDIATE ACTIONS (0-3 months):
1. Deploy churn prediction model for high-risk customer identification
2. Launch mobile banking optimization campaign for Segment 0
3. Implement product recommendation engine for cross-selling

MEDIUM-TERM INITIATIVES (3-12 months):
4. Execute A/B testing framework across all segments
5. Deploy model monitoring system for production stability
6. Develop automated customer lifecycle management workflows

LONG-TERM STRATEGIC GOALS (12+ months):
7. Achieve 15% increase in customer lifetime value
8. Reduce churn rate by 25% through predictive interventions
9. Increase product penetration by 30% via targeted recommendations

###Save Models

In [None]:
# Save churn model
if churn_predictor.model is not None:
    joblib.dump(churn_predictor.model, 'churn_prediction_model.pkl')
    joblib.dump(churn_predictor.scaler, 'churn_model_scaler.pkl')
    print("✓ Churn prediction model saved")

✓ Churn prediction model saved


In [None]:
#Save Business metrics
business_results = {
    'clv_metrics': clv_metrics.to_dict(),
    'business_profiles': business_profiles,
    'product_recommendations': product_recommendations,
    'ab_testing_design': test_design,
    'business_impact_summary': business_impact
}

import json
with open('crm_business_analysis.json', 'w') as f:
    json.dump(business_results, f, indent=2, default=str)
print("✓ Business analysis results saved")

✓ Business analysis results saved


In [None]:
# Save enhanced dataset
enriched_data.to_csv('enhanced_customer_data_with_predictions.csv', index=False)
print("✓ Enhanced dataset with predictions saved")

✓ Enhanced dataset with predictions saved
