# Deep Learning Models for Customer Upsell Prediction
## AI Customer Upsell Prediction System

This notebook implements:
- Custom PyTorch Neural Networks
- Autoencoder for feature learning
- Deep TabNet architecture
- Attention mechanisms
- Advanced regularization techniques
- GPU-accelerated training
- Model interpretation and visualization

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Deep Learning libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

# Advanced architectures
try:
    from pytorch_tabnet.tab_model import TabNetClassifier
    TABNET_AVAILABLE = True
    print("✅ TabNet available")
except ImportError:
    TABNET_AVAILABLE = False
    print("⚠️ TabNet not available, will use custom architectures")

# ML utilities
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

import joblib
import json
import time
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette('viridis')
plt.rcParams['figure.figsize'] = (12, 8)

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🔥 Using device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# Load and Prepare Data for Deep Learning
print("📊 Loading Data for Deep Learning Models...")

# Load processed data
df = pd.read_csv('../data/processed/telecom_processed.csv')
feature_columns = joblib.load('../models/feature_columns.pkl')

# Prepare features and target
X = df[feature_columns].fillna(0)
X = X.replace([np.inf, -np.inf], 0)
y = df['Churn_Binary']

print(f"Dataset Shape: {X.shape}")
print(f"Features: {len(feature_columns)}")
print(f"Samples: {len(X):,}")
print(f"Churn Rate: {y.mean():.3f}")

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Further split for validation
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print(f"\n📊 Data Splits:")
print(f"  Training: {X_train.shape[0]:,} samples")
print(f"  Validation: {X_val.shape[0]:,} samples")
print(f"  Test: {X_test.shape[0]:,} samples")

# Scale features for neural networks
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"✅ Features scaled for deep learning")

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
y_train_tensor = torch.FloatTensor(y_train.values).to(device)
X_val_tensor = torch.FloatTensor(X_val_scaled).to(device)
y_val_tensor = torch.FloatTensor(y_val.values).to(device)
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
y_test_tensor = torch.FloatTensor(y_test.values).to(device)

print(f"✅ Data converted to PyTorch tensors on {device}")

In [None]:
# Custom Deep Neural Network Architecture
print("🧠 Defining Custom Deep Neural Network Architectures")
print("="*60)

class DeepCustomerNet(nn.Module):
    """Custom Deep Neural Network for Customer Upsell Prediction"""
    
    def __init__(self, input_size, hidden_sizes=[256, 128, 64, 32], dropout_rate=0.3):
        super(DeepCustomerNet, self).__init__()
        
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.dropout_rate = dropout_rate
        
        # Build layers dynamically
        layers = []
        prev_size = input_size
        
        for i, hidden_size in enumerate(hidden_sizes):
            # Linear layer
            layers.append(nn.Linear(prev_size, hidden_size))
            # Batch normalization
            layers.append(nn.BatchNorm1d(hidden_size))
            # Activation
            layers.append(nn.ReLU())
            # Dropout
            layers.append(nn.Dropout(dropout_rate))
            
            prev_size = hidden_size
        
        # Output layer
        layers.append(nn.Linear(prev_size, 1))
        layers.append(nn.Sigmoid())
        
        self.network = nn.Sequential(*layers)
        
        # Initialize weights
        self._initialize_weights()
    
    def _initialize_weights(self):
        """Initialize network weights using Xavier initialization"""
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        return self.network(x).squeeze()


class AttentionCustomerNet(nn.Module):
    """Neural Network with Attention Mechanism"""
    
    def __init__(self, input_size, hidden_size=128, num_heads=8):
        super(AttentionCustomerNet, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        # Feature embedding
        self.feature_embedding = nn.Linear(input_size, hidden_size)
        
        # Multi-head attention
        self.attention = nn.MultiheadAttention(
            embed_dim=hidden_size,
            num_heads=num_heads,
            batch_first=True
        )
        
        # Classification layers
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Embed features
        embedded = self.feature_embedding(x)  # [batch_size, hidden_size]
        
        # Add sequence dimension for attention
        embedded = embedded.unsqueeze(1)  # [batch_size, 1, hidden_size]
        
        # Apply attention
        attended, attention_weights = self.attention(embedded, embedded, embedded)
        
        # Remove sequence dimension
        attended = attended.squeeze(1)  # [batch_size, hidden_size]
        
        # Classify
        output = self.classifier(attended)
        
        return output.squeeze(), attention_weights


class ResidualCustomerNet(nn.Module):
    """Neural Network with Residual Connections"""
    
    def __init__(self, input_size, hidden_size=128, num_blocks=3):
        super(ResidualCustomerNet, self).__init__()
        
        # Input projection
        self.input_projection = nn.Linear(input_size, hidden_size)
        
        # Residual blocks
        self.residual_blocks = nn.ModuleList([
            self._make_residual_block(hidden_size) for _ in range(num_blocks)
        ])
        
        # Output layers
        self.output_layers = nn.Sequential(
            nn.Linear(hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    
    def _make_residual_block(self, hidden_size):
        return nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size)
        )
    
    def forward(self, x):
        # Project input
        x = self.input_projection(x)
        
        # Apply residual blocks
        for block in self.residual_blocks:
            residual = x
            x = block(x)
            x = F.relu(x + residual)  # Residual connection
        
        # Output
        return self.output_layers(x).squeeze()

print("✅ Custom neural network architectures defined")

In [None]:
# Autoencoder for Feature Learning
print("🔄 Implementing Autoencoder for Feature Learning")
print("="*55)

class CustomerAutoencoder(nn.Module):
    """Autoencoder for learning compressed customer representations"""
    
    def __init__(self, input_size, encoding_sizes=[32, 16, 8]):
        super(CustomerAutoencoder, self).__init__()
        
        self.input_size = input_size
        self.encoding_sizes = encoding_sizes
        
        # Encoder
        encoder_layers = []
        prev_size = input_size
        
        for encoding_size in encoding_sizes:
            encoder_layers.extend([
                nn.Linear(prev_size, encoding_size),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            prev_size = encoding_size
        
        self.encoder = nn.Sequential(*encoder_layers[:-1])  # Remove last dropout
        
        # Decoder
        decoder_layers = []
        decoding_sizes = encoding_sizes[::-1][1:] + [input_size]  # Reverse and add input size
        
        prev_size = encoding_sizes[-1]
        for i, decoding_size in enumerate(decoding_sizes):
            decoder_layers.append(nn.Linear(prev_size, decoding_size))
            if i < len(decoding_sizes) - 1:  # No activation on final layer
                decoder_layers.extend([nn.ReLU(), nn.Dropout(0.2)])
            prev_size = decoding_size
        
        self.decoder = nn.Sequential(*decoder_layers)
    
    def encode(self, x):
        return self.encoder(x)
    
    def decode(self, encoded):
        return self.decoder(encoded)
    
    def forward(self, x):
        encoded = self.encode(x)
        decoded = self.decode(encoded)
        return decoded, encoded


class VariationalAutoencoder(nn.Module):
    """Variational Autoencoder for probabilistic feature learning"""
    
    def __init__(self, input_size, latent_size=16, hidden_size=64):
        super(VariationalAutoencoder, self).__init__()
        
        self.input_size = input_size
        self.latent_size = latent_size
        self.hidden_size = hidden_size
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        
        # Latent space
        self.fc_mu = nn.Linear(hidden_size // 2, latent_size)
        self.fc_logvar = nn.Linear(hidden_size // 2, latent_size)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, input_size)
        )
    
    def encode(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        return mu, logvar
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        return self.decoder(z)
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        reconstructed = self.decode(z)
        return reconstructed, mu, logvar, z

print("✅ Autoencoder architectures defined")

In [None]:
# Training Utilities and Loss Functions
print("⚙️ Setting Up Training Utilities")
print("="*40)

class FocalLoss(nn.Module):
    """Focal Loss for handling class imbalance"""
    
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
    
    def forward(self, inputs, targets):
        ce_loss = F.binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()


class EarlyStopping:
    """Early stopping utility"""
    
    def __init__(self, patience=10, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False
    
    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, 
                num_epochs=100, early_stopping=None, device='cpu'):
    """Generic model training function"""
    
    train_losses = []
    val_losses = []
    val_aucs = []
    
    best_val_auc = 0.0
    best_model_state = None
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            
            # Handle different model outputs
            if isinstance(model, AttentionCustomerNet):
                outputs, _ = model(batch_X)
            else:
                outputs = model(batch_X)
            
            loss = criterion(outputs, batch_y)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        all_outputs = []
        all_targets = []
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                
                if isinstance(model, AttentionCustomerNet):
                    outputs, _ = model(batch_X)
                else:
                    outputs = model(batch_X)
                
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                
                all_outputs.extend(outputs.cpu().numpy())
                all_targets.extend(batch_y.cpu().numpy())
        
        # Calculate metrics
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_auc = roc_auc_score(all_targets, all_outputs)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_aucs.append(val_auc)
        
        # Save best model
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            best_model_state = model.state_dict().copy()
        
        # Learning rate scheduling
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(val_loss)
        else:
            scheduler.step()
        
        # Early stopping
        if early_stopping:
            early_stopping(val_loss)
            if early_stopping.early_stop:
                print(f"Early stopping at epoch {epoch+1}")
                break
        
        # Print progress
        if (epoch + 1) % 10 == 0:
            current_lr = optimizer.param_groups[0]['lr']
            print(f"Epoch [{epoch+1}/{num_epochs}] - "
                  f"Train Loss: {train_loss:.4f}, "
                  f"Val Loss: {val_loss:.4f}, "
                  f"Val AUC: {val_auc:.4f}, "
                  f"LR: {current_lr:.6f}")
    
    # Load best model
    if best_model_state:
        model.load_state_dict(best_model_state)
    
    return {
        'train_losses': train_losses,
        'val_losses': val_losses,
        'val_aucs': val_aucs,
        'best_val_auc': best_val_auc
    }

print("✅ Training utilities set up")

In [None]:
# Train Autoencoder for Feature Learning
print("🔄 Training Autoencoder for Feature Learning")
print("="*50)

# Create data loaders
batch_size = 512
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"✅ Data loaders created (batch size: {batch_size})")

# Initialize autoencoder
input_size = X_train_tensor.shape[1]
autoencoder = CustomerAutoencoder(input_size, encoding_sizes=[64, 32, 16]).to(device)

# Training setup
ae_criterion = nn.MSELoss()
ae_optimizer = optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)
ae_scheduler = ReduceLROnPlateau(ae_optimizer, mode='min', factor=0.5, patience=10)
ae_early_stopping = EarlyStopping(patience=15)

print("🚀 Training autoencoder...")
start_time = time.time()

# Custom training loop for autoencoder
autoencoder.train()
ae_train_losses = []
ae_val_losses = []

num_epochs = 50
for epoch in range(num_epochs):
    # Training phase
    train_loss = 0.0
    for batch_X, _ in train_loader:  # Don't need labels for autoencoder
        batch_X = batch_X.to(device)
        
        ae_optimizer.zero_grad()
        reconstructed, encoded = autoencoder(batch_X)
        loss = ae_criterion(reconstructed, batch_X)
        loss.backward()
        ae_optimizer.step()
        
        train_loss += loss.item()
    
    # Validation phase
    autoencoder.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch_X, _ in val_loader:
            batch_X = batch_X.to(device)
            reconstructed, encoded = autoencoder(batch_X)
            loss = ae_criterion(reconstructed, batch_X)
            val_loss += loss.item()
    
    autoencoder.train()
    
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    
    ae_train_losses.append(train_loss)
    ae_val_losses.append(val_loss)
    
    ae_scheduler.step(val_loss)
    ae_early_stopping(val_loss)
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    if ae_early_stopping.early_stop:
        print(f"Early stopping at epoch {epoch+1}")
        break

training_time = time.time() - start_time
print(f"✅ Autoencoder training completed in {training_time:.1f} seconds")

# Extract learned features
autoencoder.eval()
with torch.no_grad():
    X_train_encoded = autoencoder.encode(X_train_tensor).cpu().numpy()
    X_val_encoded = autoencoder.encode(X_val_tensor).cpu().numpy()
    X_test_encoded = autoencoder.encode(X_test_tensor).cpu().numpy()

print(f"✅ Features encoded: {X_train_tensor.shape[1]} → {X_train_encoded.shape[1]} dimensions")

In [None]:
# Train Deep Neural Networks
print("🧠 Training Deep Neural Network Models")
print("="*45)

# Initialize models
models = {
    'DeepCustomerNet': DeepCustomerNet(
        input_size=input_size,
        hidden_sizes=[256, 128, 64, 32],
        dropout_rate=0.3
    ).to(device),
    
    'AttentionNet': AttentionCustomerNet(
        input_size=input_size,
        hidden_size=128,
        num_heads=8
    ).to(device),
    
    'ResidualNet': ResidualCustomerNet(
        input_size=input_size,
        hidden_size=128,
        num_blocks=3
    ).to(device)
}

# Add autoencoder-based model
class AutoencoderClassifier(nn.Module):
    def __init__(self, autoencoder, encoded_size):
        super(AutoencoderClassifier, self).__init__()
        self.autoencoder = autoencoder
        self.classifier = nn.Sequential(
            nn.Linear(encoded_size, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )
        
        # Freeze autoencoder weights
        for param in self.autoencoder.parameters():
            param.requires_grad = False
    
    def forward(self, x):
        with torch.no_grad():
            encoded = self.autoencoder.encode(x)
        return self.classifier(encoded).squeeze()

models['AutoencoderNet'] = AutoencoderClassifier(autoencoder, X_train_encoded.shape[1]).to(device)

print(f"✅ Initialized {len(models)} deep learning models")

# Training results storage
training_results = {}
trained_models = {}

# Train each model
for model_name, model in models.items():
    print(f"\n🚀 Training {model_name}...")
    
    # Setup training components
    criterion = FocalLoss(alpha=1, gamma=2)  # Handle class imbalance
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6)
    early_stopping = EarlyStopping(patience=15, min_delta=0.001)
    
    # Train model
    start_time = time.time()
    
    results = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        num_epochs=100,
        early_stopping=early_stopping,
        device=device
    )
    
    training_time = time.time() - start_time
    
    print(f"✅ {model_name} training completed in {training_time:.1f} seconds")
    print(f"   Best Validation AUC: {results['best_val_auc']:.4f}")
    
    training_results[model_name] = results
    training_results[model_name]['training_time'] = training_time
    trained_models[model_name] = model

print(f"\n🎉 All models trained successfully!")

In [None]:
# TabNet Implementation (if available)
if TABNET_AVAILABLE:
    print("📊 Training TabNet Model")
    print("="*30)
    
    # Prepare data for TabNet (requires numpy arrays)
    X_train_np = X_train_scaled
    X_val_np = X_val_scaled
    X_test_np = X_test_scaled
    y_train_np = y_train.values
    y_val_np = y_val.values
    y_test_np = y_test.values
    
    # Initialize TabNet
    tabnet_model = TabNetClassifier(
        n_d=32,  # Width of the decision prediction layer
        n_a=32,  # Width of the attention embedding for each mask
        n_steps=3,  # Number of successive steps in the architecture
        gamma=1.3,  # Coefficient for feature reusage in the masks
        cat_idxs=[],  # No categorical features
        cat_dims=[],  # No categorical features
        cat_emb_dim=1,
        lambda_sparse=1e-3,  # Sparsity regularization
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
        mask_type='sparsemax',
        scheduler_params=dict(mode="min", patience=10, min_lr=1e-5, factor=0.9),
        scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
        verbose=1,
        device_name=str(device)
    )
    
    print("🚀 Training TabNet...")
    start_time = time.time()
    
    # Train TabNet
    tabnet_model.fit(
        X_train=X_train_np,
        y_train=y_train_np,
        eval_set=[(X_val_np, y_val_np)],
        eval_name=['validation'],
        eval_metric=['auc'],
        max_epochs=100,
        patience=15,
        batch_size=512,
        virtual_batch_size=128,
        num_workers=0,
        drop_last=False
    )
    
    tabnet_training_time = time.time() - start_time
    
    # Get TabNet predictions
    tabnet_val_preds = tabnet_model.predict_proba(X_val_np)[:, 1]
    tabnet_val_auc = roc_auc_score(y_val_np, tabnet_val_preds)
    
    print(f"✅ TabNet training completed in {tabnet_training_time:.1f} seconds")
    print(f"   Validation AUC: {tabnet_val_auc:.4f}")
    
    # Add to results
    training_results['TabNet'] = {
        'best_val_auc': tabnet_val_auc,
        'training_time': tabnet_training_time
    }
    trained_models['TabNet'] = tabnet_model
    
else:
    print("⚠️ TabNet not available, skipping...")

In [None]:
# Evaluate All Models on Test Set
print("📊 Evaluating All Models on Test Set")
print("="*45)

test_results = {}

for model_name, model in trained_models.items():
    print(f"\n🔄 Evaluating {model_name}...")
    
    if model_name == 'TabNet' and TABNET_AVAILABLE:
        # TabNet evaluation
        y_pred_proba = model.predict_proba(X_test_np)[:, 1]
        y_pred = (y_pred_proba > 0.5).astype(int)
        y_true = y_test_np
        
    else:
        # PyTorch model evaluation
        model.eval()
        all_predictions = []
        all_probabilities = []
        all_targets = []
        
        with torch.no_grad():
            for batch_X, batch_y in test_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                
                if isinstance(model, AttentionCustomerNet):
                    outputs, _ = model(batch_X)
                else:
                    outputs = model(batch_X)
                
                probabilities = outputs.cpu().numpy()
                predictions = (probabilities > 0.5).astype(int)
                
                all_probabilities.extend(probabilities)
                all_predictions.extend(predictions)
                all_targets.extend(batch_y.cpu().numpy())
        
        y_pred_proba = np.array(all_probabilities)
        y_pred = np.array(all_predictions)
        y_true = np.array(all_targets)
    
    # Calculate metrics
    auc = roc_auc_score(y_true, y_pred_proba)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    test_results[model_name] = {
        'AUC': auc,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Training_Time': training_results[model_name]['training_time']
    }
    
    print(f"  AUC: {auc:.4f}")
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1-Score: {f1:.4f}")

# Create results DataFrame
results_df = pd.DataFrame(test_results).T
results_df = results_df.sort_values('AUC', ascending=False)

print(f"\n🏆 Deep Learning Model Performance Summary:")
print(results_df.round(4))

# Find best model
best_dl_model = results_df.index[0]
best_dl_auc = results_df.iloc[0]['AUC']

print(f"\n🥇 Best Deep Learning Model: {best_dl_model} (AUC: {best_dl_auc:.4f})")

In [None]:
# Model Interpretability and Feature Analysis
print("🔍 Deep Learning Model Interpretability")
print("="*50)

# Attention Visualization (if AttentionNet performed well)
if 'AttentionNet' in trained_models:
    print("\n👁️ Analyzing Attention Weights...")
    
    attention_model = trained_models['AttentionNet']
    attention_model.eval()
    
    # Get attention weights for a sample
    sample_batch = X_test_tensor[:100]  # First 100 test samples
    
    with torch.no_grad():
        _, attention_weights = attention_model(sample_batch)
    
    # Average attention weights across samples and heads
    avg_attention = attention_weights.mean(dim=(0, 1)).cpu().numpy()  # Average across batch and heads
    
    print(f"✅ Attention analysis completed")
    print(f"   Attention weights shape: {attention_weights.shape}")

# Feature Importance using Gradient-based methods
def get_feature_importance_gradients(model, X_sample, feature_names):
    """Calculate feature importance using gradients"""
    model.eval()
    X_sample.requires_grad_()
    
    if isinstance(model, AttentionCustomerNet):
        output, _ = model(X_sample)
    else:
        output = model(X_sample)
    
    # Calculate gradients
    gradients = torch.autograd.grad(output.sum(), X_sample, create_graph=True)[0]
    
    # Feature importance as absolute gradient values
    importance = torch.abs(gradients).mean(dim=0).cpu().numpy()
    
    return importance

# Calculate feature importance for best model
if best_dl_model != 'TabNet':
    print(f"\n📊 Calculating feature importance for {best_dl_model}...")
    
    best_model = trained_models[best_dl_model]
    sample_data = X_test_tensor[:1000]  # Use 1000 samples
    
    feature_importance = get_feature_importance_gradients(
        best_model, sample_data, feature_columns
    )
    
    # Create feature importance DataFrame
    importance_df = pd.DataFrame({
        'Feature': feature_columns,
        'Importance': feature_importance
    }).sort_values('Importance', ascending=False)
    
    print(f"\n🎯 Top 15 Most Important Features ({best_dl_model}):")
    print(importance_df.head(15))
    
    # Visualize feature importance
    plt.figure(figsize=(12, 8))
    top_features = importance_df.head(15)
    
    bars = plt.barh(range(len(top_features)), top_features['Importance'], 
                    color=plt.cm.viridis(np.linspace(0, 1, len(top_features))))
    plt.yticks(range(len(top_features)), top_features['Feature'])
    plt.xlabel('Feature Importance (Gradient-based)')
    plt.title(f'Top 15 Feature Importance - {best_dl_model}', fontsize=14, fontweight='bold')
    plt.gca().invert_yaxis()
    
    # Add value labels
    for i, (bar, value) in enumerate(zip(bars, top_features['Importance'])):
        plt.text(value + 0.001, i, f'{value:.4f}', va='center')
    
    plt.tight_layout()
    plt.show()

elif TABNET_AVAILABLE and 'TabNet' in trained_models:
    print(f"\n📊 TabNet Feature Importance Analysis...")
    
    # TabNet has built-in feature importance
    tabnet_importance = trained_models['TabNet'].feature_importances_
    
    importance_df = pd.DataFrame({
        'Feature': feature_columns,
        'Importance': tabnet_importance
    }).sort_values('Importance', ascending=False)
    
    print(f"\n🎯 Top 15 Most Important Features (TabNet):")
    print(importance_df.head(15))
    
    # Visualize TabNet feature importance
    plt.figure(figsize=(12, 8))
    top_features = importance_df.head(15)
    
    bars = plt.barh(range(len(top_features)), top_features['Importance'], 
                    color=plt.cm.plasma(np.linspace(0, 1, len(top_features))))
    plt.yticks(range(len(top_features)), top_features['Feature'])
    plt.xlabel('Feature Importance')
    plt.title(f'Top 15 Feature Importance - TabNet', fontsize=14, fontweight='bold')
    plt.gca().invert_yaxis()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Training History Visualization
print("📈 Visualizing Training History")
print("="*40)

# Create training history plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Deep Learning Training History', fontsize=16, fontweight='bold')

colors = ['blue', 'red', 'green', 'purple', 'orange']
model_names = [name for name in training_results.keys() if name != 'TabNet']

# Training Loss
for i, model_name in enumerate(model_names):
    if 'train_losses' in training_results[model_name]:
        axes[0,0].plot(training_results[model_name]['train_losses'], 
                      label=model_name, color=colors[i % len(colors)], alpha=0.7)

axes[0,0].set_title('Training Loss')
axes[0,0].set_xlabel('Epoch')
axes[0,0].set_ylabel('Loss')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# Validation Loss
for i, model_name in enumerate(model_names):
    if 'val_losses' in training_results[model_name]:
        axes[0,1].plot(training_results[model_name]['val_losses'], 
                      label=model_name, color=colors[i % len(colors)], alpha=0.7)

axes[0,1].set_title('Validation Loss')
axes[0,1].set_xlabel('Epoch')
axes[0,1].set_ylabel('Loss')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# Validation AUC
for i, model_name in enumerate(model_names):
    if 'val_aucs' in training_results[model_name]:
        axes[1,0].plot(training_results[model_name]['val_aucs'], 
                      label=model_name, color=colors[i % len(colors)], alpha=0.7)

axes[1,0].set_title('Validation AUC')
axes[1,0].set_xlabel('Epoch')
axes[1,0].set_ylabel('AUC Score')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Model Performance Comparison
model_names_all = list(test_results.keys())
auc_scores = [test_results[name]['AUC'] for name in model_names_all]
training_times = [test_results[name]['Training_Time'] for name in model_names_all]

bars = axes[1,1].bar(range(len(model_names_all)), auc_scores, 
                    color=plt.cm.viridis(np.linspace(0, 1, len(model_names_all))))
axes[1,1].set_title('Test AUC Comparison')
axes[1,1].set_xticks(range(len(model_names_all)))
axes[1,1].set_xticklabels(model_names_all, rotation=45, ha='right')
axes[1,1].set_ylabel('AUC Score')

# Highlight best model
best_idx = model_names_all.index(best_dl_model)
bars[best_idx].set_color('gold')
bars[best_idx].set_edgecolor('red')
bars[best_idx].set_linewidth(2)

# Add value labels
for bar, value in zip(bars, auc_scores):
    axes[1,1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005, 
                   f'{value:.3f}', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

# Autoencoder reconstruction visualization
print("\n🔄 Autoencoder Reconstruction Analysis...")

plt.figure(figsize=(12, 6))

# Plot autoencoder training history
plt.subplot(1, 2, 1)
plt.plot(ae_train_losses, label='Training Loss', color='blue')
plt.plot(ae_val_losses, label='Validation Loss', color='red')
plt.title('Autoencoder Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot reconstruction quality
plt.subplot(1, 2, 2)
with torch.no_grad():
    sample_input = X_test_tensor[:100]
    reconstructed, _ = autoencoder(sample_input)
    reconstruction_error = F.mse_loss(reconstructed, sample_input, reduction='none').mean(dim=1).cpu().numpy()

plt.hist(reconstruction_error, bins=30, alpha=0.7, color='green', edgecolor='black')
plt.title('Reconstruction Error Distribution')
plt.xlabel('MSE Error')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"✅ Training history visualization completed")

In [None]:
# Business Impact Analysis with Deep Learning Models
print("💼 Business Impact Analysis - Deep Learning Models")
print("="*60)

# Use best deep learning model for business analysis
if best_dl_model != 'TabNet':
    best_model = trained_models[best_dl_model]
    best_model.eval()
    
    with torch.no_grad():
        if isinstance(best_model, AttentionCustomerNet):
            final_predictions, _ = best_model(X_test_tensor)
        else:
            final_predictions = best_model(X_test_tensor)
        
        final_predictions = final_predictions.cpu().numpy()

elif TABNET_AVAILABLE and best_dl_model == 'TabNet':
    final_predictions = trained_models['TabNet'].predict_proba(X_test_np)[:, 1]

# Business calculations
avg_customer_value = df['Total_Charges'].mean()
annual_customer_value = avg_customer_value * 12
intervention_cost = avg_customer_value * 0.15
success_rate = 0.35  # Higher success rate for advanced AI

# Calculate business metrics
from sklearn.metrics import confusion_matrix
final_binary_pred = (final_predictions > 0.5).astype(int)
cm = confusion_matrix(y_test, final_binary_pred)
tn, fp, fn, tp = cm.ravel()

customers_to_target = tp + fp
customers_saved = tp * success_rate
total_intervention_cost = customers_to_target * intervention_cost
revenue_saved = customers_saved * annual_customer_value
net_benefit = revenue_saved - total_intervention_cost
roi = (net_benefit / total_intervention_cost) * 100 if total_intervention_cost > 0 else 0

print(f"\n💰 Deep Learning Model Business Impact ({best_dl_model}):")
print(f"  Model AUC: {best_dl_auc:.4f}")
print(f"  Customers to Target: {customers_to_target:,}")
print(f"  Expected Customers Saved: {customers_saved:.0f}")
print(f"  Total Intervention Cost: ${total_intervention_cost:,.2f}")
print(f"  Revenue Saved: ${revenue_saved:,.2f}")
print(f"  Net Benefit: ${net_benefit:,.2f}")
print(f"  ROI: {roi:.1f}%")

# Compare with baseline models
try:
    with open('../outputs/optimization/optimization_results.json', 'r') as f:
        baseline_results = json.load(f)
    
    baseline_auc = baseline_results['final_auc']
    baseline_net_benefit = baseline_results['business_impact']['net_benefit']
    
    auc_improvement = ((best_dl_auc - baseline_auc) / baseline_auc) * 100
    benefit_improvement = ((net_benefit - baseline_net_benefit) / abs(baseline_net_benefit)) * 100
    
    print(f"\n📊 Deep Learning vs Optimized Traditional Models:")
    print(f"  AUC Improvement: {auc_improvement:.2f}%")
    print(f"  Net Benefit Improvement: {benefit_improvement:.2f}%")
    
    if auc_improvement > 0:
        print(f"  🎉 Deep learning models outperform traditional ML!")
    else:
        print(f"  ⚠️ Traditional ML models still competitive")
        
except:
    print(f"\n⚠️ Baseline comparison not available")

# Confidence-based segmentation
confidence_segments = {
    'Very High Confidence': (final_predictions >= 0.8).sum(),
    'High Confidence': ((final_predictions >= 0.6) & (final_predictions < 0.8)).sum(),
    'Medium Confidence': ((final_predictions >= 0.4) & (final_predictions < 0.6)).sum(),
    'Low Confidence': (final_predictions < 0.4).sum()
}

print(f"\n🎯 Prediction Confidence Segmentation:")
for segment, count in confidence_segments.items():
    percentage = (count / len(final_predictions)) * 100
    print(f"  {segment}: {count:,} customers ({percentage:.1f}%)")

In [None]:
# Advanced Model Ensemble
print("🎭 Creating Advanced Deep Learning Ensemble")
print("="*55)

# Combine predictions from all models using weighted average
ensemble_predictions = np.zeros(len(X_test_tensor))
total_weight = 0

print("\n🔄 Computing ensemble predictions...")

for model_name, model in trained_models.items():
    # Weight by AUC performance
    weight = test_results[model_name]['AUC']
    
    if model_name == 'TabNet' and TABNET_AVAILABLE:
        predictions = model.predict_proba(X_test_np)[:, 1]
    else:
        model.eval()
        with torch.no_grad():
            if isinstance(model, AttentionCustomerNet):
                predictions, _ = model(X_test_tensor)
            else:
                predictions = model(X_test_tensor)
            predictions = predictions.cpu().numpy()
    
    ensemble_predictions += weight * predictions
    total_weight += weight
    
    print(f"  Added {model_name} with weight {weight:.4f}")

# Normalize ensemble predictions
ensemble_predictions /= total_weight

# Evaluate ensemble
ensemble_auc = roc_auc_score(y_test, ensemble_predictions)
ensemble_binary = (ensemble_predictions > 0.5).astype(int)
ensemble_accuracy = accuracy_score(y_test, ensemble_binary)
ensemble_precision = precision_score(y_test, ensemble_binary)
ensemble_recall = recall_score(y_test, ensemble_binary)
ensemble_f1 = f1_score(y_test, ensemble_binary)

print(f"\n🎉 Deep Learning Ensemble Results:")
print(f"  AUC: {ensemble_auc:.4f}")
print(f"  Accuracy: {ensemble_accuracy:.4f}")
print(f"  Precision: {ensemble_precision:.4f}")
print(f"  Recall: {ensemble_recall:.4f}")
print(f"  F1-Score: {ensemble_f1:.4f}")

# Compare ensemble with best individual model
ensemble_improvement = ((ensemble_auc - best_dl_auc) / best_dl_auc) * 100
print(f"\n📈 Ensemble vs Best Individual Model:")
print(f"  Best Individual AUC: {best_dl_auc:.4f}")
print(f"  Ensemble AUC: {ensemble_auc:.4f}")
print(f"  Improvement: {ensemble_improvement:.2f}%")

# Update results with ensemble
test_results['Deep_Learning_Ensemble'] = {
    'AUC': ensemble_auc,
    'Accuracy': ensemble_accuracy,
    'Precision': ensemble_precision,
    'Recall': ensemble_recall,
    'F1-Score': ensemble_f1,
    'Training_Time': sum(result['Training_Time'] for result in test_results.values() if 'Training_Time' in result)
}

# Determine final best model
final_best_auc = max(ensemble_auc, best_dl_auc)
final_best_model = 'Deep_Learning_Ensemble' if ensemble_auc > best_dl_auc else best_dl_model
final_predictions_best = ensemble_predictions if ensemble_auc > best_dl_auc else final_predictions

print(f"\n🏆 Final Best Model: {final_best_model} (AUC: {final_best_auc:.4f})")

In [None]:
# Save Deep Learning Models and Results
print("💾 Saving Deep Learning Models and Results")
print("="*50)

import os
os.makedirs('../models/deep_learning', exist_ok=True)
os.makedirs('../outputs/deep_learning', exist_ok=True)

# Save PyTorch models
for model_name, model in trained_models.items():
    if model_name != 'TabNet':
        torch.save({
            'model_state_dict': model.state_dict(),
            'model_class': model.__class__.__name__,
            'model_config': {
                'input_size': input_size,
                # Add other config parameters as needed
            }
        }, f'../models/deep_learning/{model_name.lower()}_model.pth')
        print(f"✅ Saved {model_name} PyTorch model")

# Save TabNet model (if available)
if TABNET_AVAILABLE and 'TabNet' in trained_models:
    trained_models['TabNet'].save_model('../models/deep_learning/tabnet_model')
    print("✅ Saved TabNet model")

# Save autoencoder
torch.save({
    'model_state_dict': autoencoder.state_dict(),
    'encoding_sizes': [64, 32, 16],
    'input_size': input_size
}, '../models/deep_learning/autoencoder.pth')
print("✅ Saved autoencoder model")

# Save scaler
joblib.dump(scaler, '../models/deep_learning/deep_learning_scaler.pkl')
print("✅ Saved deep learning scaler")

# Save results and analysis
deep_learning_summary = {
    'model_performance': test_results,
    'training_results': {k: {key: val for key, val in v.items() if key != 'train_losses' and key != 'val_losses' and key != 'val_aucs'} 
                        for k, v in training_results.items()},
    'best_individual_model': best_dl_model,
    'best_individual_auc': float(best_dl_auc),
    'ensemble_auc': float(ensemble_auc),
    'final_best_model': final_best_model,
    'final_best_auc': float(final_best_auc),
    'business_impact': {
        'customers_targeted': int(customers_to_target),
        'customers_saved': float(customers_saved),
        'total_intervention_cost': float(total_intervention_cost),
        'revenue_saved': float(revenue_saved),
        'net_benefit': float(net_benefit),
        'roi_percentage': float(roi)
    },
    'confidence_segments': {k: int(v) for k, v in confidence_segments.items()},
    'feature_importance': importance_df.to_dict('records') if 'importance_df' in locals() else [],
    'autoencoder_performance': {
        'final_train_loss': ae_train_losses[-1] if ae_train_losses else 0,
        'final_val_loss': ae_val_losses[-1] if ae_val_losses else 0,
        'compression_ratio': f"{input_size} -> {X_train_encoded.shape[1]}"
    }
}

# Save deep learning summary
with open('../outputs/deep_learning/deep_learning_results.json', 'w') as f:
    json.dump(deep_learning_summary, f, indent=2, default=str)

print("✅ Saved deep learning results")

# Save model comparison
final_results_df = pd.DataFrame(test_results).T
final_results_df.to_csv('../outputs/deep_learning/model_comparison.csv')
print("✅ Saved model comparison")

# Save predictions for ensemble
predictions_df = pd.DataFrame({
    'actual': y_test.values,
    'ensemble_predictions': ensemble_predictions,
    'best_individual_predictions': final_predictions,
    'confidence_level': pd.cut(ensemble_predictions, 
                              bins=[0, 0.4, 0.6, 0.8, 1.0], 
                              labels=['Low', 'Medium', 'High', 'Very High'], include_lowest=True)
})
predictions_df.to_csv('../outputs/deep_learning/predictions.csv', index=False)
print("✅ Saved predictions")

In [None]:
# Create Comprehensive Deep Learning Report
print("📋 Creating Comprehensive Deep Learning Report")
print("="*55)

# Calculate additional insights
total_training_time = sum(result['Training_Time'] for result in test_results.values() if 'Training_Time' in result)
avg_auc = np.mean([result['AUC'] for result in test_results.values()])
model_count = len(test_results) - 1  # Exclude ensemble

# GPU utilization info
gpu_info = ""
if torch.cuda.is_available():
    gpu_memory_used = torch.cuda.max_memory_allocated() / 1024**3
    gpu_info = f"\n🔥 GPU UTILIZATION:\n• Max Memory Used: {gpu_memory_used:.2f} GB\n• Device: {torch.cuda.get_device_name(0)}"

# Create comprehensive report
deep_learning_report = f"""
🎉 DEEP LEARNING MODELS TRAINING COMPLETED!
======================================================================

🧠 MODELS TRAINED:
• Deep Neural Networks: {model_count}
• Autoencoder: 1 (Feature learning)
• TabNet: {'1' if TABNET_AVAILABLE else '0 (not available)'}
• Ensemble Model: 1 (Weighted combination)
• Total Training Time: {total_training_time:.1f} seconds

🏆 PERFORMANCE RESULTS:
• Best Individual Model: {best_dl_model} (AUC: {best_dl_auc:.4f})
• Ensemble Model AUC: {ensemble_auc:.4f}
• Final Best Model: {final_best_model} (AUC: {final_best_auc:.4f})
• Average Model AUC: {avg_auc:.4f}
• Ensemble Improvement: {ensemble_improvement:.2f}%

🎯 MODEL ARCHITECTURES:
• DeepCustomerNet: Multi-layer perceptron with batch normalization
• AttentionNet: Multi-head attention mechanism for feature importance
• ResidualNet: Residual connections for deep learning
• AutoencoderNet: Compressed feature representation learning
{'• TabNet: Attention-based tabular deep learning' if TABNET_AVAILABLE else ''}

🔄 AUTOENCODER INSIGHTS:
• Feature Compression: {input_size} -> {X_train_encoded.shape[1]} dimensions
• Final Training Loss: {ae_train_losses[-1]:.4f}
• Final Validation Loss: {ae_val_losses[-1]:.4f}
• Compression Ratio: {(1 - X_train_encoded.shape[1]/input_size)*100:.1f}% reduction

💼 BUSINESS IMPACT (BEST MODEL):
• Customers to Target: {customers_to_target:,}
• Expected Customers Saved: {customers_saved:.0f}
• Total Intervention Cost: ${total_intervention_cost:,.2f}
• Revenue Saved: ${revenue_saved:,.2f}
• Net Benefit: ${net_benefit:,.2f}
• ROI: {roi:.1f}%

🎯 PREDICTION CONFIDENCE:
• Very High Confidence: {confidence_segments['Very High Confidence']:,} customers
• High Confidence: {confidence_segments['High Confidence']:,} customers
• Medium Confidence: {confidence_segments['Medium Confidence']:,} customers
• Low Confidence: {confidence_segments['Low Confidence']:,} customers
{gpu_info}

📁 SAVED ARTIFACTS:
• PyTorch Models: ../models/deep_learning/
• Autoencoder: ../models/deep_learning/autoencoder.pth
{'• TabNet Model: ../models/deep_learning/tabnet_model/' if TABNET_AVAILABLE else ''}
• Scaler: ../models/deep_learning/deep_learning_scaler.pkl
• Results: ../outputs/deep_learning/deep_learning_results.json
• Predictions: ../outputs/deep_learning/predictions.csv
• Model Comparison: ../outputs/deep_learning/model_comparison.csv

🔍 KEY INSIGHTS:
• {'Deep learning outperforms traditional ML' if 'auc_improvement' in locals() and auc_improvement > 0 else 'Traditional ML remains competitive'}
• Attention mechanisms {'provide' if 'AttentionNet' in test_results else 'could provide'} interpretable feature importance
• Autoencoder successfully compressed features by {(1 - X_train_encoded.shape[1]/input_size)*100:.1f}%
• Ensemble approach {'improved' if ensemble_improvement > 0 else 'maintained'} performance
• GPU acceleration {'enabled' if torch.cuda.is_available() else 'not available'} for faster training

🚀 NEXT STEPS:
1. Deploy best performing model to production
2. Implement real-time inference pipeline
3. Set up model monitoring and drift detection
4. Create A/B testing framework for model comparison
5. Implement automated retraining with new data
6. Develop model interpretability dashboard
7. Scale inference for high-throughput predictions

💡 ADVANCED FEATURES IMPLEMENTED:
✅ Custom neural network architectures
✅ Attention mechanisms for interpretability
✅ Residual connections for deep networks
✅ Autoencoder for feature learning
✅ Advanced regularization (dropout, batch norm)
✅ Focal loss for class imbalance
✅ Learning rate scheduling
✅ Early stopping
✅ Gradient clipping
✅ Model ensembling
{'✅ TabNet implementation' if TABNET_AVAILABLE else '❌ TabNet not available'}
✅ GPU acceleration
✅ Feature importance analysis
✅ Business impact calculation
"""

print(deep_learning_report)

# Save report
with open('../outputs/deep_learning/deep_learning_summary.txt', 'w') as f:
    f.write(deep_learning_report)

print("✅ Deep learning summary saved to ../outputs/deep_learning/deep_learning_summary.txt")
print("\n🎉 Deep learning model development completed successfully!")

# Clean up GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("🧹 GPU memory cleared")