<a href="https://colab.research.google.com/github/shavindukesara/Telco-Churn-Predictor/blob/main/Telco%20Churn%20Predictor/source%20code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================================================================
# TELCO CUSTOMER CHURN PREDICTION SYSTEM
# ================================================================================

# Install required computational packages for data analysis
!pip install -q scikit-learn tensorflow pandas numpy matplotlib seaborn

# Import core data processing and visualization modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Import machine learning components from scikit-learn
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import (classification_report, confusion_matrix,
                            accuracy_score, precision_score, recall_score,
                            f1_score, roc_auc_score, roc_curve, auc)
from sklearn.utils.class_weight import compute_class_weight

# Import deep learning framework components
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Set random seeds for reproducible analytical results
np.random.seed(42)
tf.random.set_seed(42)

print("All computational libraries imported and configured successfully!")

# ================================================================================
# LOAD TELCO CUSTOMER DATASET FOR ANALYSIS
# ================================================================================

# Retrieve dataset from IBM's public repository
dataset_url = 'https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv'
customer_records = pd.read_csv(dataset_url)

print(f"Customer dataset loaded with dimensions: {customer_records.shape}")
print(customer_records.head())

# ================================================================================
# EXPLORATORY DATA ANALYSIS: CUSTOMER BEHAVIOR PATTERNS
# ================================================================================

# Display dataset metadata and structural information
print("\n--- Dataset Structural Overview ---")
customer_records.info()
print(f"\nDataset dimensions: {customer_records.shape}")
print(f"\nMissing data points across columns:\n{customer_records.isnull().sum()}")

# Analyze distribution of the target variable
print("\n--- Customer Churn Distribution Analysis ---")
print(customer_records['Churn'].value_counts())
print(f"\nOverall churn percentage: {customer_records['Churn'].value_counts(normalize=True)['Yes']*100:.2f}%")

# Visual representation of churn distribution
fig, chart_panels = plt.subplots(1, 2, figsize=(14, 5))
churn_distribution = customer_records['Churn'].value_counts()
chart_panels[0].bar(churn_distribution.index, churn_distribution.values, color=['#2ecc71', '#e74c3c'])
chart_panels[0].set_title('Customer Churn Distribution', fontsize=14, fontweight='bold')
chart_panels[0].set_ylabel('Customer Count')
for position, count_value in enumerate(churn_distribution.values):
    chart_panels[0].text(position, count_value + 50, str(count_value), ha='center', fontweight='bold')

chart_panels[1].pie(churn_distribution.values, labels=churn_distribution.index, autopct='%1.1f%%',
            startangle=90, colors=['#2ecc71', '#e74c3c'])
chart_panels[1].set_title('Churn Percentage Breakdown', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print("\nObservation: Dataset exhibits class imbalance - 73% retained customers vs 27% churned customers")

# ================================================================================
# NUMERICAL ATTRIBUTE ANALYSIS
# ================================================================================

print(f"\nMissing entries in TotalCharges before conversion: {customer_records['TotalCharges'].isnull().sum()}")
customer_records['TotalCharges'] = pd.to_numeric(customer_records['TotalCharges'], errors='coerce')
print(f"Missing entries in TotalCharges after conversion: {customer_records['TotalCharges'].isnull().sum()}")

# Prepare clean dataset for visualization purposes
clean_dataset = customer_records.dropna(subset=['TotalCharges'])

# Visualize distribution of numerical attributes
fig, attribute_plots = plt.subplots(1, 3, figsize=(18, 5))
for plot_index, numerical_feature in enumerate(['tenure', 'MonthlyCharges', 'TotalCharges']):
    attribute_plots[plot_index].hist(clean_dataset[numerical_feature], bins=30, edgecolor='black', alpha=0.7, color='skyblue')
    attribute_plots[plot_index].set_title(f'{numerical_feature} Distribution Pattern', fontsize=12, fontweight='bold')
    attribute_plots[plot_index].set_xlabel(numerical_feature)
    attribute_plots[plot_index].set_ylabel('Frequency Count')
plt.tight_layout()
plt.show()

# Comparative analysis of numerical features by churn status
fig, comparative_plots = plt.subplots(1, 3, figsize=(18, 5))
for plot_index, numerical_attribute in enumerate(['tenure', 'MonthlyCharges', 'TotalCharges']):
    customer_records.boxplot(column=numerical_attribute, by='Churn', ax=comparative_plots[plot_index], patch_artist=True,
               boxprops=dict(facecolor='lightblue'),
               medianprops=dict(color='red', linewidth=2))
    comparative_plots[plot_index].set_title(f'{numerical_attribute} by Churn Status', fontsize=12, fontweight='bold')
    comparative_plots[plot_index].set_xlabel('Churn Category')
    comparative_plots[plot_index].set_ylabel(numerical_attribute)
    comparative_plots[plot_index].set_title(f'{numerical_attribute} Distribution by Churn Status', fontsize=12, fontweight='bold')

plt.suptitle('')
plt.tight_layout()
plt.show()

print("\nKey numerical insights:")
print("- Customers who churn typically have shorter relationship duration")
print("- Churned customers often have higher monthly service fees")
print("- Total charges are generally lower for customers who eventually churn")

# ================================================================================
# CATEGORICAL ATTRIBUTE ANALYSIS
# ================================================================================

# Analyze key categorical features affecting churn
significant_categories = ['Contract', 'InternetService', 'PaymentMethod', 'OnlineSecurity']
fig, category_panels = plt.subplots(2, 2, figsize=(16, 10))
category_panels = category_panels.ravel()

for panel_index, categorical_feature in enumerate(significant_categories):
    churn_by_category = pd.crosstab(customer_records[categorical_feature], customer_records['Churn'], normalize='index') * 100
    churn_by_category.plot(kind='bar', ax=category_panels[panel_index], color=['#2ecc71', '#e74c3c'])
    category_panels[panel_index].set_title(f'Churn Rate Analysis: {categorical_feature}', fontsize=11, fontweight='bold')
    category_panels[panel_index].set_ylabel('Percentage (%)')
    category_panels[panel_index].tick_params(axis='x', rotation=45)
    category_panels[panel_index].legend(['No Churn', 'Churn'])
plt.tight_layout()
plt.show()

print("\nCritical categorical findings:")
print("- Month-to-month contracts show highest churn rate at ~42%")
print("- Fiber optic internet subscribers have elevated churn likelihood")
print("- Electronic check payment method correlates with higher churn")
print("- Customers without online security features churn more frequently")

# ================================================================================
# ATTRIBUTE CORRELATION ANALYSIS
# ================================================================================

print("\n--- Feature Correlation Examination ---")
encoded_dataset = customer_records.copy()

# Prepare categorical features for correlation analysis
from sklearn.preprocessing import LabelEncoder
category_encoder = LabelEncoder()

categorical_features = encoded_dataset.select_dtypes(include=['object']).columns.tolist()
if 'customerID' in categorical_features:
    categorical_features.remove('customerID')

for feature_column in categorical_features:
    encoded_dataset[feature_column] = category_encoder.fit_transform(encoded_dataset[feature_column].astype(str))

encoded_dataset = encoded_dataset.drop('customerID', axis=1)
encoded_dataset['TotalCharges'] = pd.to_numeric(encoded_dataset['TotalCharges'], errors='coerce')
encoded_dataset['TotalCharges'] = encoded_dataset['TotalCharges'].fillna(encoded_dataset['TotalCharges'].median())

correlation_structure = encoded_dataset.corr()

# Identify features most correlated with churn
churn_correlations = correlation_structure['Churn'].abs().sort_values(ascending=False)
top_correlated_features = churn_correlations.head(15).index.tolist()
top_correlation_view = correlation_structure.loc[top_correlated_features, top_correlated_features]

# Visualize correlation patterns
plt.figure(figsize=(14, 10))
sns.heatmap(top_correlation_view,
            annot=True,
            fmt='.2f',
            cmap='RdYlBu_r',
            center=0,
            square=True,
            linewidths=0.5,
            linecolor='white',
            cbar_kws={'shrink': 0.8, 'label': 'Correlation Coefficient'},
            annot_kws={'size': 9, 'weight': 'bold'})

plt.title('Top 15 Feature Correlations with Churn', fontsize=16, fontweight='bold')
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(rotation=0, fontsize=10)
plt.tight_layout()
plt.show()

print("\nTop 15 attributes correlated with churn:")
print(churn_correlations[1:16])

# ================================================================================
# TENURE SEGMENTATION ANALYSIS
# ================================================================================

# Create tenure segments for detailed analysis
tenure_intervals = [0, 12, 24, 36, 48, 60, 72]
customer_records['TenureSegment'] = pd.cut(customer_records['tenure'], bins=tenure_intervals, labels=['0-12', '13-24', '25-36', '37-48', '49-60', '61-72'])
churn_by_tenure = pd.crosstab(customer_records['TenureSegment'], customer_records['Churn'], normalize='index') * 100

plt.figure(figsize=(10, 6))
churn_by_tenure.plot(kind='bar', color=['#2ecc71', '#e74c3c'])
plt.title('Churn Analysis by Tenure Duration', fontsize=14, fontweight='bold')
plt.xlabel('Customer Tenure (months)')
plt.ylabel('Percentage Distribution (%)')
plt.legend(['No Churn', 'Churn'])
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

print("\nInsight: Churn probability decreases significantly as customer tenure increases")

print("""
COMPREHENSIVE ANALYSIS FINDINGS:
1. Dataset exhibits significant class imbalance favoring retained customers
2. Contract type emerges as the most influential churn predictor
3. Customer tenure shows strong inverse relationship with churn likelihood
4. Monthly charges demonstrate positive correlation with churn probability
5. Service type and payment method substantially impact retention rates
6. Additional services play protective role against customer attrition
""")

# =============================================================================
# DATA PREPARATION PIPELINE
# =============================================================================

prepared_data = customer_records.copy()

# Handle missing numerical values
prepared_data['TotalCharges'] = pd.to_numeric(prepared_data['TotalCharges'], errors='coerce')
print(f"Missing TotalCharges entries: {prepared_data['TotalCharges'].isnull().sum()}")

prepared_data['TotalCharges'] = prepared_data['TotalCharges'].fillna(prepared_data['TotalCharges'].median())

# Remove non-predictive identifier columns
prepared_data = prepared_data.drop(['customerID', 'TenureSegment'], axis=1, errors='ignore')

# Convert binary categorical features to numerical format
binary_features = ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
for binary_column in binary_features:
    prepared_data[binary_column] = prepared_data[binary_column].map({'Yes': 1, 'No': 0, 'Male': 1, 'Female': 0})

# Apply one-hot encoding to multi-class categorical features
multi_category_features = ['MultipleLines', 'InternetService', 'OnlineSecurity',
                    'OnlineBackup', 'DeviceProtection', 'TechSupport',
                    'StreamingTV', 'StreamingMovies', 'Contract', 'PaymentMethod']
prepared_data = pd.get_dummies(prepared_data, columns=multi_category_features, drop_first=True, dtype=int)

# Encode target variable for modeling
prepared_data['Churn'] = prepared_data['Churn'].map({'Yes': 1, 'No': 0})

# Separate predictive features from target variable
predictor_variables = prepared_data.drop('Churn', axis=1)
target_variable = prepared_data['Churn']

print(f"Predictor feature dimensions: {predictor_variables.shape}")
print(f"Target variable distribution:\n{target_variable.value_counts()}")

# Split data into training and testing subsets
X_train_set, X_test_set, y_train_set, y_test_set = train_test_split(
    predictor_variables, target_variable, test_size=0.2, random_state=42, stratify=target_variable
)

# Standardize features for neural network compatibility
standard_scaler = StandardScaler()
X_train_normalized = standard_scaler.fit_transform(X_train_set)
X_test_normalized = standard_scaler.transform(X_test_set)

print(f"Training subset dimensions: {X_train_set.shape}")
print(f"Testing subset dimensions: {X_test_set.shape}")
print("Data preparation pipeline completed successfully")

# =============================================================================
# DECISION TREE CLASSIFICATION MODEL
# =============================================================================

# Configure hyperparameter search space
parameter_grid = {
    'max_depth': [4, 6, 8, 10, 12], # Tree complexity control
    'min_samples_split': [10, 20, 30, 40], # Node splitting criteria
    'min_samples_leaf': [5, 10, 15, 20], # Terminal node control
    'criterion': ['gini', 'entropy'], # Impurity measures
    'max_features': ['sqrt', 'log2', 0.5, 0.7, None] # Feature sampling
}

# Execute comprehensive hyperparameter optimization
grid_optimization_process = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    parameter_grid,
    cv=5, # 5-fold cross-validation
    scoring='roc_auc', # Primary optimization metric
    n_jobs=-1, # Parallel computation
    verbose=1 # Search progress tracking
)

grid_optimization_process.fit(X_train_set, y_train_set)

print(f"\nOptimal parameter configuration: {grid_optimization_process.best_params_}")
print(f"Best cross-validation ROC-AUC score: {grid_optimization_process.best_score_:.4f}")

# Extract best performing decision tree model
optimal_tree_model = grid_optimization_process.best_estimator_
tree_predictions = optimal_tree_model.predict(X_test_set)
tree_probability_predictions = optimal_tree_model.predict_proba(X_test_set)[:, 1]

print("\n--- DECISION TREE MODEL PERFORMANCE ---")
print(classification_report(y_test_set, tree_predictions, target_names=['No Churn', 'Churn']))

tree_confusion_results = confusion_matrix(y_test_set, tree_predictions)
print(f"Confusion matrix results:\n{tree_confusion_results}")

plt.figure(figsize=(8, 6))
sns.heatmap(tree_confusion_results, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'])
plt.ylabel('Actual Classification', fontweight='bold')
plt.xlabel('Predicted Classification', fontweight='bold')
plt.title('Decision Tree Confusion Matrix Visualization', fontweight='bold')
plt.tight_layout()
plt.show()

tree_performance_metrics = {
    'Accuracy Score': accuracy_score(y_test_set, tree_predictions),
    'Precision Metric': precision_score(y_test_set, tree_predictions),
    'Recall Metric': recall_score(y_test_set, tree_predictions),
    'F1 Score': f1_score(y_test_set, tree_predictions),
    'ROC-AUC Score': roc_auc_score(y_test_set, tree_probability_predictions)
}

print("\nModel performance metrics:")
for metric_name, metric_value in tree_performance_metrics.items():
    print(f"  {metric_name}: {metric_value:.4f}")

# Analyze feature importance in decision tree
feature_importance_analysis = pd.DataFrame({
    'Predictor Feature': X_train_set.columns.tolist(),
    'Importance Value': optimal_tree_model.feature_importances_
}).sort_values('Importance Value', ascending=False)

print(f"\nFeatures with predictive power: {(feature_importance_analysis['Importance Value'] > 0).sum()}/{len(feature_importance_analysis)}")
print(f"Total importance distribution: {feature_importance_analysis['Importance Value'].sum():.6f}")

print("\nTop 10 influential features:")
print(feature_importance_analysis.head(10).to_string(index=False))

print("\nContract type impact interpretation:")
for contract_type in ['Contract_Two year', 'Contract_One year']:
    if contract_type in feature_importance_analysis['Predictor Feature'].values:
        importance_value = feature_importance_analysis.loc[feature_importance_analysis['Predictor Feature'] == contract_type, 'Importance Value'].values[0]
        print(f"  {contract_type}: {importance_value:.4f} (compared to month-to-month baseline)")

# Visualize top feature importance
plt.figure(figsize=(14, 8))
display_count = min(15, len(feature_importance_analysis))
top_important_features = feature_importance_analysis.head(display_count)

color_scheme = []
for feature_name in top_important_features['Predictor Feature']:
    if 'Contract' in feature_name:
        color_scheme.append('#e74c3c')
    elif 'tenure' in feature_name.lower():
        color_scheme.append('#3498db')
    elif 'charge' in feature_name.lower():
        color_scheme.append('#2ecc71')
    else:
        color_scheme.append('#95a5a6')

importance_bars = plt.barh(range(display_count), top_important_features['Importance Value'], color=color_scheme, edgecolor='black')

for bar_position, (importance_val, feature_name) in enumerate(zip(top_important_features['Importance Value'], top_important_features['Predictor Feature'])):
    plt.text(importance_val + 0.001, bar_position, f'{importance_val:.4f}', va='center', fontsize=9)

plt.yticks(range(display_count), top_important_features['Predictor Feature'])
plt.xlabel('Feature Importance Score', fontweight='bold')
plt.title(f'Top {display_count} Feature Importance Rankings', fontweight='bold')
plt.gca().invert_yaxis()
plt.grid(axis='x', alpha=0.3, linestyle='--')
plt.tight_layout()
plt.show()

# Visualize decision tree structure
plt.figure(figsize=(20, 10))
plot_tree(optimal_tree_model, max_depth=3, feature_names=X_train_set.columns.tolist(),
          class_names=['No Churn', 'Churn'], filled=True, rounded=True, fontsize=9)
plt.title('Decision Tree Structure Visualization (Top 3 Levels)', fontweight='bold')
plt.tight_layout()
plt.show()

# Decision tree structural analysis
print("\n" + "="*80)
print("DECISION TREE STRUCTURAL ANALYSIS")
print("="*80)

print(f"Tree depth configuration: {optimal_tree_model.get_depth()}")
print(f"Number of terminal nodes: {optimal_tree_model.get_n_leaves()}")

# Examine tree splitting structure
tree_architecture = optimal_tree_model.tree_
feature_split_indices = tree_architecture.feature
print("\nInitial splitting decisions in tree:")
for node_index in range(min(5, tree_architecture.node_count)):
    if feature_split_indices[node_index] != -2:
        split_feature = X_train_set.columns[feature_split_indices[node_index]]
        split_threshold = tree_architecture.threshold[node_index]
        print(f"  Node {node_index}: Split on {split_feature} <= {split_threshold:.4f}")

# Identify features used in tree construction
print("\nFeatures utilized in tree construction:")
utilized_features = []
for node_index in range(tree_architecture.node_count):
    if feature_split_indices[node_index] != -2:
        split_feature = X_train_set.columns[feature_split_indices[node_index]]
        if split_feature not in utilized_features:
            utilized_features.append(split_feature)

print(f"Unique features in decision tree: {len(utilized_features)}")
print("Primary splitting features:")
for feature_item in utilized_features[:5]:
    print(f"  â€¢ {feature_item}")

#Scaled vs Unscaled Analysis
# =============================================================================
# DECISION TREE ON NORMALIZED DATA (Comparative Analysis)
# =============================================================================

print("\n" + "="*80)
print("DECISION TREE PERFORMANCE ON NORMALIZED DATA")
print("="*80)

# Train decision tree on standardized features
normalized_tree_model = DecisionTreeClassifier(
    **grid_optimization_process.best_params_,
    random_state=42
)

normalized_tree_model.fit(X_train_normalized, y_train_set)

# Generate predictions from normalized data model
normalized_tree_predictions = normalized_tree_model.predict(X_test_normalized)
normalized_tree_probabilities = normalized_tree_model.predict_proba(X_test_normalized)[:, 1]

print("\n--- NORMALIZED DATA DECISION TREE RESULTS ---")
print(classification_report(y_test_set, normalized_tree_predictions, target_names=['No Churn', 'Churn']))

normalized_confusion_matrix = confusion_matrix(y_test_set, normalized_tree_predictions)
print(f"Confusion matrix results:\n{normalized_confusion_matrix}")

# Calculate performance metrics for normalized tree
normalized_tree_metrics = {
    'Accuracy Score': accuracy_score(y_test_set, normalized_tree_predictions),
    'Precision Metric': precision_score(y_test_set, normalized_tree_predictions),
    'Recall Metric': recall_score(y_test_set, normalized_tree_predictions),
    'F1 Score': f1_score(y_test_set, normalized_tree_predictions),
    'ROC-AUC Score': roc_auc_score(y_test_set, normalized_tree_probabilities)
}

print("\nNormalized tree performance metrics:")
for metric_name, metric_value in normalized_tree_metrics.items():
    print(f"  {metric_name}: {metric_value:.4f}")

# Confusion matrix visualization for normalized tree
plt.figure(figsize=(8, 6))
sns.heatmap(normalized_confusion_matrix, annot=True, fmt='d', cmap='Greens',
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'])
plt.ylabel('Actual Classification', fontweight='bold')
plt.xlabel('Predicted Classification', fontweight='bold')
plt.title('Normalized Data Decision Tree Confusion Matrix', fontweight='bold')
plt.tight_layout()
plt.show()

# Feature importance analysis for normalized tree
normalized_feature_importance = pd.DataFrame({
    'Predictor Feature': X_train_set.columns.tolist(),
    'Importance Value': normalized_tree_model.feature_importances_
}).sort_values('Importance Value', ascending=False)

print(f"\nFeatures with predictive power in normalized tree: {(normalized_feature_importance['Importance Value'] > 0).sum()}/{len(normalized_feature_importance)}")

print("\nTop 10 influential features in normalized tree:")
print(normalized_feature_importance.head(10).to_string(index=False))

# =============================================================================
# ROC ANALYSIS FOR BOTH DECISION TREE MODELS
# =============================================================================

# Generate ROC curves for comparative analysis
from sklearn.metrics import roc_curve

# ROC curve for original decision tree
fpr_original_tree, tpr_original_tree, _ = roc_curve(y_test_set, tree_probability_predictions)

# ROC curve for normalized decision tree
fpr_normalized_tree, tpr_normalized_tree, _ = roc_curve(y_test_set, normalized_tree_probabilities)

# =============================================================================
# COMPARATIVE ANALYSIS: ORIGINAL VS NORMALIZED DATA
# =============================================================================

tree_comparison_results = pd.DataFrame({
    'Performance Metric': ['Accuracy Score', 'Precision Metric', 'Recall Metric', 'F1 Score', 'ROC-AUC Score'],
    'Original Data Tree': [tree_performance_metrics['Accuracy Score'], tree_performance_metrics['Precision Metric'],
                           tree_performance_metrics['Recall Metric'], tree_performance_metrics['F1 Score'], tree_performance_metrics['ROC-AUC Score']],
    'Normalized Data Tree': [normalized_tree_metrics['Accuracy Score'], normalized_tree_metrics['Precision Metric'],
                              normalized_tree_metrics['Recall Metric'], normalized_tree_metrics['F1 Score'], normalized_tree_metrics['ROC-AUC Score']]
})

print("\n" + tree_comparison_results.to_string(index=False))

# Visual comparison of both decision tree models
fig, comparison_plots = plt.subplots(1, 2, figsize=(16, 6))

# Performance metrics bar chart comparison
metric_positions = np.arange(len(tree_comparison_results['Performance Metric']))
bar_width = 0.35

original_bars = comparison_plots[0].bar(metric_positions - bar_width/2, tree_comparison_results['Original Data Tree'], bar_width,
                    label='Original Data Model', color='steelblue')
normalized_bars = comparison_plots[0].bar(metric_positions + bar_width/2, tree_comparison_results['Normalized Data Tree'], bar_width,
                    label='Normalized Data Model', color='lightgreen')

comparison_plots[0].set_xlabel('Performance Metric', fontweight='bold')
comparison_plots[0].set_ylabel('Metric Value', fontweight='bold')
comparison_plots[0].set_title('Decision Tree: Original vs Normalized Data Performance', fontweight='bold')
comparison_plots[0].set_xticks(metric_positions)
comparison_plots[0].set_xticklabels(tree_comparison_results['Performance Metric'])
comparison_plots[0].legend()
comparison_plots[0].set_ylim(0, 1)

for bar_collection in [original_bars, normalized_bars]:
    for individual_bar in bar_collection:
        bar_height = individual_bar.get_height()
        comparison_plots[0].text(individual_bar.get_x() + individual_bar.get_width()/2., bar_height,
                    f'{bar_height:.3f}', ha='center', va='bottom', fontsize=9)

# Feature importance comparison visualization
top_features_count = 10
top_original_features = feature_importance_analysis.head(top_features_count)
top_normalized_features = normalized_feature_importance.head(top_features_count)

# Align features for direct comparison
common_feature_set = set(top_original_features['Predictor Feature']).union(set(top_normalized_features['Predictor Feature']))
importance_comparison = {}
for feature_name in common_feature_set:
    importance_comparison[feature_name] = {
        'Original': top_original_features.loc[top_original_features['Predictor Feature'] == feature_name, 'Importance Value'].values[0]
               if feature_name in top_original_features['Predictor Feature'].values else 0,
        'Normalized': top_normalized_features.loc[top_normalized_features['Predictor Feature'] == feature_name, 'Importance Value'].values[0]
                 if feature_name in top_normalized_features['Predictor Feature'].values else 0
    }

importance_comparison_df = pd.DataFrame(importance_comparison).T.sort_values('Original', ascending=False)

feature_positions = np.arange(len(importance_comparison_df))
comparison_plots[1].bar(feature_positions - 0.2, importance_comparison_df['Original'], 0.4, label='Original Data', color='steelblue', alpha=0.8)
comparison_plots[1].bar(feature_positions + 0.2, importance_comparison_df['Normalized'], 0.4, label='Normalized Data', color='lightgreen', alpha=0.8)

comparison_plots[1].set_xlabel('Predictor Features', fontweight='bold')
comparison_plots[1].set_ylabel('Importance Score', fontweight='bold')
comparison_plots[1].set_title('Feature Importance Comparison: Original vs Normalized', fontweight='bold')
comparison_plots[1].set_xticks(feature_positions)
comparison_plots[1].set_xticklabels(importance_comparison_df.index, rotation=45, ha='right')
comparison_plots[1].legend()
comparison_plots[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

# ROC curve comparison visualization
plt.figure(figsize=(10, 8))
plt.plot(fpr_original_tree, tpr_original_tree, linewidth=3, label=f'Original Data Tree (AUC={tree_performance_metrics["ROC-AUC Score"]:.3f})')
plt.plot(fpr_normalized_tree, tpr_normalized_tree, linewidth=3, label=f'Normalized Data Tree (AUC={normalized_tree_metrics["ROC-AUC Score"]:.3f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier Baseline')
plt.xlabel('False Positive Rate', fontweight='bold')
plt.ylabel('True Positive Rate', fontweight='bold')
plt.title('ROC Curve Comparison: Original vs Normalized Decision Trees', fontweight='bold')
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()


# =============================================================================
# NEURAL NETWORK CLASSIFICATION MODEL
# =============================================================================

# Calculate class weights to address imbalance
calculated_class_weights = compute_class_weight('balanced', classes=np.unique(y_train_set), y=y_train_set)
class_weight_dict = {i: calculated_class_weights[i] for i in range(len(calculated_class_weights))}
print(f"Calculated class weights: {class_weight_dict}")

# Define neural network architecture
def construct_neural_network(input_dimension, learning_rate=0.001):
    network_model = Sequential([
        Dense(64, activation='relu', input_dim=input_dimension),
        BatchNormalization(),
        Dropout(0.5),

        Dense(32, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),

        Dense(16, activation='relu'),
        Dropout(0.3),

        Dense(1, activation='sigmoid')
    ])

    network_model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return network_model

# Perform hyperparameter tuning
print("\nNeural Network Hyperparameter Optimization Process...")
parameter_configurations = [
    {'learning_rate': 0.001, 'batch_size': 32},
    {'learning_rate': 0.0005, 'batch_size': 64},
]

best_validation_score = 0
optimal_configuration = None

for config_setting in parameter_configurations:
    print(f"Testing configuration - Learning Rate: {config_setting['learning_rate']}, Batch Size: {config_setting['batch_size']}")

    test_network = construct_neural_network(X_train_normalized.shape[1], config_setting['learning_rate'])

    training_history = test_network.fit(
        X_train_normalized, y_train_set,
        epochs=30,
        batch_size=config_setting['batch_size'],
        validation_split=0.2,
        class_weight=class_weight_dict,
        callbacks=[
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=0),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=0)
        ],
        verbose=0
    )

    _, _, validation_auc_score = test_network.evaluate(X_test_normalized, y_test_set, verbose=0)
    print(f"  Validation AUC performance: {validation_auc_score:.4f}")

    if validation_auc_score > best_validation_score:
        best_validation_score = validation_auc_score
        optimal_configuration = config_setting
        best_network_model = test_network

print(f"\nOptimal configuration identified: Learning Rate={optimal_configuration['learning_rate']}, Batch Size={optimal_configuration['batch_size']}")

# Train final neural network model
print("\nTraining final neural network model...")
final_neural_network = construct_neural_network(X_train_normalized.shape[1], optimal_configuration['learning_rate'])

complete_training_history = final_neural_network.fit(
    X_train_normalized, y_train_set,
    epochs=80,
    batch_size=optimal_configuration['batch_size'],
    validation_split=0.2,
    class_weight=class_weight_dict,
    callbacks=[
        EarlyStopping(monitor='val_loss', patience=15, min_delta=0.001, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=8, verbose=1)
    ],
    verbose=1
)

# Evaluate neural network performance
neural_network_probabilities = final_neural_network.predict(X_test_normalized).flatten()

fpr_neural, tpr_neural, classification_thresholds = roc_curve(y_test_set, neural_network_probabilities)
optimal_threshold_index = np.argmax(tpr_neural - fpr_neural)
optimal_classification_threshold = classification_thresholds[optimal_threshold_index]
print(f"\nOptimal classification threshold: {optimal_classification_threshold:.3f}")

neural_network_predictions = (neural_network_probabilities > optimal_classification_threshold).astype(int)

print("\n--- NEURAL NETWORK MODEL PERFORMANCE ---")
print(classification_report(y_test_set, neural_network_predictions, target_names=['No Churn', 'Churn']))

neural_network_confusion = confusion_matrix(y_test_set, neural_network_predictions)
print(f"Confusion matrix results:\n{neural_network_confusion}")

neural_network_performance = {
    'Accuracy Score': accuracy_score(y_test_set, neural_network_predictions),
    'Precision Metric': precision_score(y_test_set, neural_network_predictions),
    'Recall Metric': recall_score(y_test_set, neural_network_predictions),
    'F1 Score': f1_score(y_test_set, neural_network_predictions),
    'ROC-AUC Score': roc_auc_score(y_test_set, neural_network_probabilities)
}

print("\nNeural network performance metrics:")
for metric_name, metric_value in neural_network_performance.items():
    print(f"  {metric_name}: {metric_value:.4f}")

# Training progress visualization
fig, training_progress_plots = plt.subplots(1, 3, figsize=(18, 5))

training_progress_plots[0].plot(complete_training_history.history['loss'], label='Training Loss')
training_progress_plots[0].plot(complete_training_history.history['val_loss'], label='Validation Loss')
training_progress_plots[0].set_title('Loss Function Progression', fontweight='bold')
training_progress_plots[0].set_xlabel('Training Epoch')
training_progress_plots[0].legend()

training_progress_plots[1].plot(complete_training_history.history['accuracy'], label='Training Accuracy')
training_progress_plots[1].plot(complete_training_history.history['val_accuracy'], label='Validation Accuracy')
training_progress_plots[1].set_title('Accuracy Progression', fontweight='bold')
training_progress_plots[1].set_xlabel('Training Epoch')
training_progress_plots[1].legend()

training_progress_plots[2].plot(complete_training_history.history['auc'], label='Training AUC')
training_progress_plots[2].plot(complete_training_history.history['val_auc'], label='Validation AUC')
training_progress_plots[2].set_title('AUC Metric Progression', fontweight='bold')
training_progress_plots[2].set_xlabel('Training Epoch')
training_progress_plots[2].legend()

plt.tight_layout()
plt.show()

# Neural network ROC curve visualization
plt.figure(figsize=(8, 6))
plt.plot(fpr_neural, tpr_neural, color='darkorange', lw=2,
         label=f'Neural Network ROC (AUC = {neural_network_performance["ROC-AUC Score"]:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier Baseline')
plt.scatter(fpr_neural[optimal_threshold_index], tpr_neural[optimal_threshold_index], color='red', s=100,
            label=f'Optimal Threshold: {optimal_classification_threshold:.3f}')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontweight='bold')
plt.ylabel('True Positive Rate', fontweight='bold')
plt.title('Neural Network ROC Curve Analysis', fontweight='bold')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

# =============================================================================
# COMPREHENSIVE MODEL COMPARISON
# =============================================================================

print("\n" + "="*80)
print("COMPREHENSIVE MODEL PERFORMANCE COMPARISON")
print("="*80)

# Performance comparison table
model_comparison_table = pd.DataFrame({
    'Performance Metric': ['Accuracy Score', 'Precision Metric', 'Recall Metric', 'F1 Score', 'ROC-AUC Score'],
    'Decision Tree Model': [tree_performance_metrics['Accuracy Score'], tree_performance_metrics['Precision Metric'],
                      tree_performance_metrics['Recall Metric'], tree_performance_metrics['F1 Score'], tree_performance_metrics['ROC-AUC Score']],
    'Neural Network Model': [neural_network_performance['Accuracy Score'], neural_network_performance['Precision Metric'],
                       neural_network_performance['Recall Metric'], neural_network_performance['F1 Score'], neural_network_performance['ROC-AUC Score']]
})

print("\n" + model_comparison_table.to_string(index=False))

# Visual model comparison
fig, model_comparison_plots = plt.subplots(1, 2, figsize=(16, 6))

metric_locations = np.arange(len(model_comparison_table['Performance Metric']))
comparison_bar_width = 0.35

tree_comparison_bars = model_comparison_plots[0].bar(metric_locations - comparison_bar_width/2, model_comparison_table['Decision Tree Model'], comparison_bar_width,
                    label='Decision Tree', color='steelblue')
network_comparison_bars = model_comparison_plots[0].bar(metric_locations + comparison_bar_width/2, model_comparison_table['Neural Network Model'], comparison_bar_width,
                    label='Neural Network', color='coral')

model_comparison_plots[0].set_xlabel('Performance Metric', fontweight='bold')
model_comparison_plots[0].set_ylabel('Metric Value', fontweight='bold')
model_comparison_plots[0].set_title('Model Performance Comparative Analysis', fontweight='bold')
model_comparison_plots[0].set_xticks(metric_locations)
model_comparison_plots[0].set_xticklabels(model_comparison_table['Performance Metric'])
model_comparison_plots[0].legend()
model_comparison_plots[0].set_ylim(0, 1)

for bar_set in [tree_comparison_bars, network_comparison_bars]:
    for individual_bar in bar_set:
        bar_value = individual_bar.get_height()
        model_comparison_plots[0].text(individual_bar.get_x() + individual_bar.get_width()/2., bar_value,
                    f'{bar_value:.3f}', ha='center', va='bottom', fontsize=9)

# Confusion matrix comparison visualization
fig, confusion_comparison = plt.subplots(1, 2, figsize=(16, 6))

# Decision tree confusion matrix
sns.heatmap(tree_confusion_results, annot=True, fmt='d', cmap='Blues', ax=confusion_comparison[0],
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'])
confusion_comparison[0].set_title('Decision Tree Confusion Matrix', fontweight='bold')
confusion_comparison[0].set_ylabel('Actual Classification')
confusion_comparison[0].set_xlabel('Predicted Classification')

# Neural network confusion matrix
sns.heatmap(neural_network_confusion, annot=True, fmt='d', cmap='Reds', ax=confusion_comparison[1],
            xticklabels=['No Churn', 'Churn'],
            yticklabels=['No Churn', 'Churn'])
confusion_comparison[1].set_title('Neural Network Confusion Matrix', fontweight='bold')
confusion_comparison[1].set_ylabel('Actual Classification')
confusion_comparison[1].set_xlabel('Predicted Classification')

plt.tight_layout()
plt.show()

# ROC curve comparative analysis
fpr_decision_tree, tpr_decision_tree, _ = roc_curve(y_test_set, tree_probability_predictions)
fpr_neural_network, tpr_neural_network, _ = roc_curve(y_test_set, neural_network_probabilities)

plt.figure(figsize=(10, 8))
plt.plot(fpr_decision_tree, tpr_decision_tree, linewidth=3, label=f'Decision Tree (AUC={tree_performance_metrics["ROC-AUC Score"]:.3f})')
plt.plot(fpr_neural_network, tpr_neural_network, linewidth=3, label=f'Neural Network (AUC={neural_network_performance["ROC-AUC Score"]:.3f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classification Baseline')
plt.xlabel('False Positive Rate', fontweight='bold')
plt.ylabel('True Positive Rate', fontweight='bold')
plt.title('ROC Curve Comparative Analysis', fontweight='bold')
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

# Final analysis summary
print("\n" + "="*80)
print("FINAL ANALYTICAL SUMMARY")
print("="*80)

superior_model = 'Neural Network' if neural_network_performance['ROC-AUC Score'] > tree_performance_metrics['ROC-AUC Score'] else 'Decision Tree'
print(f"\nOptimal performing model: {superior_model}")
print(f"\nDecision Tree Model - ROC-AUC: {tree_performance_metrics['ROC-AUC Score']:.4f}")
print(f"Neural Network Model - ROC-AUC: {neural_network_performance['ROC-AUC Score']:.4f}")
