# Dohtem E-commerce Personalization Strategy #

Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime
import pickle
import json
import os

Configure Plotting

In [None]:
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')
warnings.filterwarnings('ignore')

Set random seed for reproducibility

In [None]:
np.random.seed(42)
print("Libraries imported successfully!"),
print(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

Custom Modules

In [None]:
from data_processor import DohtemDataProcessor
from customer_segmentation import CustomerSegmentation
from churn_prediction import ChurnPredictor
from personalization_engine import PersonalizationEngine
from business_impact import BusinessImpactCalculator
from visualization_utils import create_comprehensive_visualizations

Step 1: Data Loading and Exploration

In [None]:
data_processor = DohtemDataProcessor('dohtem_ecommerce_customers.csv')
raw_data = data_processor.load_data()

Display Basic Information

In [None]:
print(f"Dataset Shape: {raw_data.shape}"),
print(f" Target Variable Distribution:"),
print(raw_data['Churn'].value_counts(normalize=True).round(3)),
print(f"First few rows: {display(raw_data.head())}")

Comprehensive data exploration

In [None]:
missing_analysis = data_processor.explore_data()
# Visualize missing data if any exists
plt.figure(figsize=(12, 6))
missing_data = missing_analysis[missing_analysis['Missing_Count'] > 0].sort_values('Missing_Percent')

if not missing_data.empty:
    plt.barh(missing_data.index, missing_data['Missing_Percent'])
    plt.xlabel('Missing Data Percentage')
    plt.title('Missing Data Analysis by Feature')
    plt.tight_layout()
    plt.show()
else:
    print("No missing data")

Step 2: Data Preprocessing

In [None]:
# Process the data
processed_data = data_processor.preprocess_data()
print("Data preprocessing completed!")
print(f"New features created: {processed_data.shape[1] - raw_data.shape[1]}")

# Show the engineered features
new_features = ['CustomerValueScore', 'EngagementScore', 'HighRiskCategory','HighRiskPayment', 'LowSatisfaction']
print("New Engineer Features:")
for feature in new_features:
    if feature in processed_data.columns:
         print(f"{feature}: Mean = {processed_data[feature].mean():.2f}, Std = {processed_data[feature].std():.2f}")

 # Display processed data samples
display(processed_data[['CustomerID', 'Churn'] + new_features].head())

Step 3: Customer Segmentation - Use K-Means clustering to identify distinct customer segments

In [None]:
# Initialize segmentation model
segmentation = CustomerSegmentation(n_clusters=5)
segmented_data = segmentation.fit_segments(processed_data.copy())

# Create models directory if it doesn't exist
os.makedirs('models', exist_ok=True)

# Save segmentation mode
segmentation.save_model('models/customer_segmentation_model.pkl')
print(f"{segmentation.n_clusters} customer segments identified")
print("Customer Segmentation model saved to: models/customer_segmentation_model.pkl")

Visualize customer segments

In [None]:
fig = segmentation.plot_segments(figsize=(15, 10))
plt.suptitle('Customer Segmentation Analysis', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

# Display segment profiles
segment_summary = segmentation.get_segment_summary()
print(f"Customer Segmentation Summary:")
display(segment_summary)

Step 4: Churn Prediction Model

In [None]:
# Initialize churn predictor
churn_predictor = ChurnPredictor()
print("Training Churn Prediction Model...")
churn_model_results = churn_predictor.train_model(segmented_data)

# Save the trained model
churn_predictor.save_model('models/churn_prediction_model.pkl')
print("Churn Prediction Model training completed!")
print(f"Model AUC Score: {churn_model_results['test_auc']:.4f}")
print("Churn Prediction Model saved to: models/churn_prediction_model.pkl")

Visualize model performance

In [None]:
fig = churn_predictor.plot_model_performance(figsize=(15, 5))
plt.suptitle('Churn Prediction Model Performance', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

print("Top 10 Most Important Features for Churn Prediction:")
feature_importance = churn_predictor.get_feature_importance()
display(feature_importance.head(10))

Step 5: Personalization Strategy Development

In [None]:
# Initialize personalization engine
personalization_engine = PersonalizationEngine()

# Create personalization strategies
strategies = personalization_engine.create_personalization_strategies(segmentation, churn_predictor)

# Create strategies directory
os.makedirs('strategies', exist_ok=True)

# Save strategies
personalization_engine.save_strategies('strategies/personalization_strategies.json')

print("Personalization strategies developed!")
print(f"{len(strategies)} segment-specific strategies created")
print("Strategies saved to: strategies/personalization_strategies.json")

Display strategy summary

In [None]:
strategy_summary = personalization_engine.get_strategy_summary()
print("PERSONALIZATION STRATEGY SUMMARY:")
display(strategy_summary)

# Visualize strategies
fig = personalization_engine.plot_strategy_overview(figsize=(12, 8))
plt.suptitle('Personalization Strategies by Segment', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

Step 6: Business Impact Analysis

In [None]:
# Initialize business impact calculator
impact_calculator = BusinessImpactCalculator(avg_customer_value=1200, implementation_cost=225000)

#Calculate Business Impact
business_impact = impact_calculator.calculate_comprehensive_impact(segmented_data, strategies)

# Create reports directory
os.makedirs('reports', exist_ok=True)

# Save business impact analysis
impact_calculator.save_analysis('reports/business_impact_analysis.json')

print("Business impact analysis completed!")
print(f"Total Revenue Impact: ${business_impact['total_revenue_impact']:,.0f}")
print(f"ROI: {business_impact['roi_percentage']:.0f}%")
print("Analysis saved to: reports/business_impact_analysis.json")

Display detailed business impact metrics

In [None]:
impact_summary = impact_calculator.get_impact_summary()
print("BUSINESS IMPACT SUMMARY:")
display(impact_summary)

# Visualize business impact
fig = impact_calculator.plot_impact_analysis(figsize=(15, 10))
plt.suptitle('Business Impact Analysis', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

Step 7: Sample Customer Recommendations

In [None]:
# Generate sample recommendations for each segment
sample_recommendations = personalization_engine.generate_sample_recommendations(segmented_data, churn_predictor, n_samples_per_segment=2)

# Create recommendations directory
os.makedirs('recommendations', exist_ok=True)

# Save recommendations
with open('recommendations/sample_customer_recommendations.json', 'w') as f:
    json.dump(sample_recommendations, f, indent=2, default=str)

print("Sample recommendations generated!")
print(f"{len(sample_recommendations)} customer recommendations created")
print("Saved to: recommendations/sample_customer_recommendations.json")

Display sample recommendations

In [None]:
print("SAMPLE CUSTOMER RECOMMENDATIONS:")
for i, rec in enumerate(sample_recommendations[:5]):
    print(f"Customer {rec['customer_id']} (Segment {rec['segment']}):")
    print(f"Risk Level: {rec['risk_level']} ({rec['churn_probability']:.2%})")
    print(f"Strategy: {rec['strategy_type']}")
    print("Top Actions:")
    for j, action in enumerate(rec['personalization_actions'][:3], 1):
        print(f"     {j}: {action}")

Step 8: Comprehensive Visualizations

In [None]:
# Create comprehensive visualization suite
viz_results = create_comprehensive_visualizations(
    raw_data=raw_data,
    processed_data=segmented_data,
    segmentation_model=segmentation,
    churn_model=churn_predictor,
    business_impact=business_impact,
    strategies=strategies)

print("Comprehensive visualizations created!")
print(f"{len(viz_results)} visualization files saved in 'visualizations/' directory")

Step 9: Save Final Results

In [None]:
# Create data/processed directory
os.makedirs('data/processed', exist_ok=True)

# Save processed dataset
segmented_data.to_csv('data/processed/dohtem_processed_with_segments.csv', index=False)
print("Processed dataset saved: data/processed/dohtem_processed_with_segments.csv")

# Create comprehensive summary report
summary_report = {
    'analysis_date': datetime.now().isoformat(),
    'dataset_info': {
    'total_customers': len(segmented_data),
    'churn_rate': segmented_data['Churn'].mean(),
    'features_count': len(segmented_data.columns)
    },
    'model_performance': {
    'churn_model_auc': churn_model_results['test_auc'],
    'segments_identified': segmentation.n_clusters
    },
    'business_impact': business_impact,
    'key_insights': [
    "Counter-intuitive finding: Higher satisfaction scores correlate with higher churn rates",
    "Mobile/Mobile Phone categories show highest churn risk (27%+)",
    "Grocery customers demonstrate excellent retention (4.9% churn)",
    f"{business_impact.get('total_customers_retained', 0):,} high-value, high-risk customers identified as priority segment",
    f"Expected ROI of {business_impact.get('roi_percentage', 0):.0f}% in Year 1 with ${business_impact.get('total_revenue_impact', 0):,.0f} revenue impact"
    ],
    }

# Save summary report
with open('reports/dohtem_analysis_summary.json', 'w') as f:
    json.dump(summary_report, f, indent=2, default=str)
print("Summary report saved: reports/dohtem_analysis_summary.json")