In [None]:
# 🚀 Setup for Google Colab
import sys
if 'google.colab' in sys.modules:
    print("🔧 Setting up for Google Colab...")
    
    # Install required dependencies
    !pip install -q matplotlib seaborn scikit-learn numpy pandas
    
    # Note: SSL framework code will be included in subsequent cells for Colab compatibility
    print("✅ Dependencies installed! SSL framework will be defined in the next cells.")
else:
    print("📝 Running locally - using installed SSL framework")

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yourusername/pyssl/blob/main/notebooks/06_production_patterns.ipynb)

## 4. Production Best Practices & Summary

### 🏗️ Key Production Patterns:

1. **Configuration Management**: Use structured configs for all parameters
2. **Model Versioning**: Track versions, metadata, and lineage  
3. **Performance Monitoring**: Monitor latency, accuracy, and resource usage
4. **Error Handling**: Robust exception handling and graceful degradation
5. **Model Registry**: Centralized storage with metadata tracking
6. **Validation Gates**: Ensure models meet requirements before deployment

### 📋 Production Deployment Checklist:

**Pre-deployment:**
- [ ] Model meets minimum performance thresholds
- [ ] Latency requirements satisfied
- [ ] Error handling tested
- [ ] Configuration validated
- [ ] Model registry updated

**Post-deployment:**
- [ ] Monitoring dashboard configured
- [ ] Alerting rules established  
- [ ] Rollback procedures tested
- [ ] Performance baselines recorded
- [ ] Continuous learning pipeline configured

This production framework provides enterprise-grade SSL deployment capabilities with proper monitoring, versioning, and lifecycle management.

In [None]:
# Create production configuration
prod_config = ProductionConfig(
    model_name="customer_sentiment_ssl",
    model_version="2.1.0",
    strategy_type="ConfidenceThreshold",
    strategy_params={"threshold": 0.93},
    max_iterations=15,
    min_accuracy=0.85,
    max_prediction_latency_ms=50.0
)

# Initialize production service
ssl_service = SSLProductionService(prod_config)

# Generate realistic production dataset
print("📊 Generating production dataset...")
X_labeled, y_labeled, X_unlabeled, X_val, y_val, X_test, y_test, y_unlabeled_true = generate_ssl_dataset(
    dataset_type="classification",
    n_samples=5000,
    n_labeled=100,  # Small labeled set - realistic for production
    test_size=0.15,
    val_size=0.10,
    random_state=42,
    n_features=15,
    n_classes=3,
    class_sep=0.75
)

print(f"   📋 Dataset: {len(X_labeled)} labeled, {len(X_unlabeled)} unlabeled")
print(f"   🎯 Classes: {len(np.unique(y_labeled))}")

# Train the production model
success = ssl_service.train_model(X_labeled, y_labeled, X_unlabeled, X_val, y_val)

if success:
    # Save the trained model
    model_path = ssl_service.save_model()
    
    # Test predictions with performance monitoring
    print("\n🔮 Testing production predictions...")
    predictions, confidences, latency = ssl_service.predict(X_test[:100], return_confidence=True)
    
    print(f"   ⚡ Prediction latency: {latency:.2f}ms per sample")
    print(f"   🎯 Average confidence: {np.mean(confidences):.3f}")
    print(f"   📊 Accuracy on test batch: {np.mean(predictions == y_test[:100]):.3f}")
    
    # Performance check
    from sklearn.metrics import accuracy_score, f1_score
    all_predictions = ssl_service.predict(X_test)
    test_accuracy = accuracy_score(y_test, all_predictions)
    test_f1 = f1_score(y_test, all_predictions, average='macro')
    
    print(f"\n📈 Full Test Performance:")
    print(f"   Accuracy: {test_accuracy:.3f} (min required: {prod_config.min_accuracy})")
    print(f"   F1-Macro: {test_f1:.3f} (min required: {prod_config.min_f1_score})")
    
    # Check if model meets production requirements
    meets_requirements = (
        test_accuracy >= prod_config.min_accuracy and 
        test_f1 >= prod_config.min_f1_score and
        latency <= prod_config.max_prediction_latency_ms
    )
    
    if meets_requirements:
        print("✅ Model meets all production requirements!")
    else:
        print("⚠️ Model does not meet production requirements")
        
else:
    print("❌ Training failed - cannot proceed to production")

## 3. Production Deployment Example

Let's demonstrate the production service with a realistic example:

In [None]:
class SSLProductionService:
    """Production-ready SSL service with monitoring and lifecycle management."""
    
    def __init__(self, config: ProductionConfig):
        self.config = config
        self.model = None
        self.model_metadata = {}
        self.performance_history = []
        self.prediction_cache = {}
        
        # Initialize directories
        Path(config.model_registry_path).mkdir(exist_ok=True)
        Path(config.metrics_storage_path).mkdir(exist_ok=True)
        
        print(f"🚀 SSL Production Service initialized: {config.model_name}")
    
    def train_model(self, X_labeled: np.ndarray, y_labeled: np.ndarray, 
                   X_unlabeled: np.ndarray, X_val: np.ndarray = None, y_val: np.ndarray = None):
        """Train SSL model with production monitoring."""
        
        print(f"🔄 Training {self.config.model_name} v{self.config.model_version}...")
        start_time = time.time()
        
        try:
            # Create strategy
            if self.config.strategy_type == "ConfidenceThreshold":
                strategy = ConfidenceThreshold(**self.config.strategy_params)
            elif self.config.strategy_type == "TopKFixedCount":
                strategy = TopKFixedCount(**self.config.strategy_params)
            else:
                raise ValueError(f"Unknown strategy: {self.config.strategy_type}")
            
            # Create and train model
            from sklearn.linear_model import LogisticRegression
            base_model = LogisticRegression(random_state=42, max_iter=1000)
            
            self.model = SelfTrainingClassifier(
                base_model=base_model,
                selection_strategy=strategy,
                integration_strategy=AppendAndGrow(),
                max_iter=self.config.max_iterations,
                labeling_convergence_threshold=self.config.convergence_threshold
            )
            
            # Train with monitoring
            self.model.fit(X_labeled, y_labeled, X_unlabeled, X_val, y_val)
            
            training_time = time.time() - start_time
            
            # Store metadata
            self.model_metadata = {
                'model_name': self.config.model_name,
                'version': self.config.model_version,
                'training_time_seconds': training_time,
                'training_timestamp': datetime.now().isoformat(),
                'initial_labeled_count': len(X_labeled),
                'unlabeled_count': len(X_unlabeled),
                'final_labeled_count': len(self.model.X_labeled_),
                'strategy': self.config.strategy_type,
                'strategy_params': self.config.strategy_params,
                'iterations_completed': len(self.model.history_),
                'stopping_reason': self.model.stopping_reason_
            }
            
            print(f"✅ Training completed in {training_time:.2f}s")
            print(f"   Final labeled samples: {self.model_metadata['final_labeled_count']}")
            print(f"   Iterations: {self.model_metadata['iterations_completed']}")
            print(f"   Stopping reason: {self.model_metadata['stopping_reason']}")
            
            return True
            
        except Exception as e:
            print(f"❌ Training failed: {str(e)}")
            return False
    
    def predict(self, X: np.ndarray, return_confidence: bool = False) -> np.ndarray:
        """Make predictions with monitoring."""
        if self.model is None:
            raise ValueError("Model not trained. Call train_model() first.")
        
        start_time = time.time()
        
        try:
            predictions = self.model.predict(X)
            
            if return_confidence:
                probas = self.model.predict_proba(X)
                confidences = np.max(probas, axis=1)
                
                prediction_time = time.time() - start_time
                latency_ms = (prediction_time / len(X)) * 1000
                
                return predictions, confidences, latency_ms
            else:
                prediction_time = time.time() - start_time
                latency_ms = (prediction_time / len(X)) * 1000
                
                # Log performance warning if latency is high
                if latency_ms > self.config.max_prediction_latency_ms:
                    print(f"⚠️ High prediction latency: {latency_ms:.2f}ms per sample")
                
                return predictions
                
        except Exception as e:
            print(f"❌ Prediction failed: {str(e)}")
            raise
    
    def save_model(self, filepath: str = None):
        """Save model and metadata to disk."""
        if filepath is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filepath = f"{self.config.model_registry_path}/{self.config.model_name}_v{self.config.model_version}_{timestamp}.pkl"
        
        try:
            model_package = {
                'model': self.model,
                'metadata': self.model_metadata,
                'config': self.config.to_dict()
            }
            
            with open(filepath, 'wb') as f:
                pickle.dump(model_package, f)
            
            print(f"💾 Model saved: {filepath}")
            return filepath
            
        except Exception as e:
            print(f"❌ Save failed: {str(e)}")
            return None
    
    def load_model(self, filepath: str):
        """Load model and metadata from disk."""
        try:
            with open(filepath, 'rb') as f:
                model_package = pickle.load(f)
            
            self.model = model_package['model'] 
            self.model_metadata = model_package['metadata']
            
            print(f"📂 Model loaded: {self.model_metadata['model_name']} v{self.model_metadata['version']}")
            return True
            
        except Exception as e:
            print(f"❌ Load failed: {str(e)}")
            return False

print("✅ SSL Production Service ready!")

## 2. Production SSL Service

Now let's create a production-ready SSL service with proper error handling, monitoring, and lifecycle management:

In [None]:
@dataclass
class ProductionConfig:
    """Production configuration for SSL systems."""
    
    # Model Configuration
    model_name: str = "ssl_classifier_v1"
    model_version: str = "1.0.0"
    strategy_type: str = "ConfidenceThreshold"
    strategy_params: Dict = None
    max_iterations: int = 10
    convergence_threshold: int = 5
    
    # Data Configuration  
    batch_size: int = 1000
    retrain_threshold: float = 0.05  # Performance drop threshold
    min_confidence_for_auto_label: float = 0.95
    max_unlabeled_ratio: float = 0.8
    
    # Infrastructure
    model_registry_path: str = "./model_registry"
    metrics_storage_path: str = "./metrics"
    log_level: str = "INFO"
    enable_monitoring: bool = True
    
    # Performance Thresholds
    min_accuracy: float = 0.8
    min_f1_score: float = 0.75
    max_prediction_latency_ms: float = 100.0
    max_training_time_hours: float = 2.0
    
    def __post_init__(self):
        if self.strategy_params is None:
            if self.strategy_type == "ConfidenceThreshold":
                self.strategy_params = {"threshold": 0.95}
            elif self.strategy_type == "TopKFixedCount":
                self.strategy_params = {"k": 10}
    
    def to_dict(self) -> Dict:
        return asdict(self)
    
    @classmethod
    def from_dict(cls, config_dict: Dict):
        return cls(**config_dict)
    
    def save(self, filepath: str):
        """Save configuration to JSON file."""
        with open(filepath, 'w') as f:
            json.dump(self.to_dict(), f, indent=2)
    
    @classmethod
    def load(cls, filepath: str):
        """Load configuration from JSON file."""
        with open(filepath, 'r') as f:
            config_dict = json.load(f)
        return cls.from_dict(config_dict)

# Example usage
config = ProductionConfig(
    model_name="customer_intent_ssl",
    strategy_type="ConfidenceThreshold", 
    strategy_params={"threshold": 0.92},
    min_accuracy=0.85
)

print("🔧 Production Configuration:")
print(json.dumps(config.to_dict(), indent=2))

## 1. Production Configuration System

First, let's create a robust configuration management system for production SSL deployments:

# 🚀 SSL Production Patterns - Enterprise Deployment

This notebook demonstrates production-ready patterns for deploying semi-supervised learning systems at scale. We'll cover monitoring, versioning, continuous learning, and deployment best practices.

**What you'll learn:**
- Production SSL model lifecycle management
- Monitoring and alerting for SSL systems
- Continuous learning and model updating
- Performance tracking and drift detection
- Deployment patterns and infrastructure considerations
- Error handling and reliability patterns

**Focus:** Enterprise-grade SSL deployment patterns

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import pickle
import json
import warnings
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, asdict
from pathlib import Path
import time
warnings.filterwarnings('ignore')

# Import our SSL framework
import sys
sys.path.append('../')
from ssl_framework.main import SelfTrainingClassifier
from ssl_framework.strategies import ConfidenceThreshold, TopKFixedCount, AppendAndGrow

# Import utilities
from utils.data_generation import generate_ssl_dataset

# Set style
plt.style.use('default')
sns.set_palette("viridis")

print("✅ All imports successful!")