In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import numpy as np
from torch.amp import autocast, GradScaler

class ContrastiveAugmentation:
    """Applies two random augmentations to create positive pairs"""
    def __init__(self, img_size=32):
        self.transform = transforms.Compose([
            transforms.RandomResizedCrop(img_size, scale=(0.2, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    
    def __call__(self, x):
        return self.transform(x), self.transform(x)

class ProjectionHead(nn.Module):
    """MLP projection head with batch normalization for contrastive learning"""
    def __init__(self, in_dim=512, hidden_dim=2048, out_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim, bias=False),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, out_dim, bias=False),
            nn.BatchNorm1d(out_dim)
        )
    
    def forward(self, x):
        return self.net(x)

class SimCLR(nn.Module):
    """SimCLR self-supervised learning model"""
    def __init__(self, encoder_dim=512, projection_dim=128, hidden_dim=2048):
        super().__init__()
        # ResNet18 encoder
        resnet = torchvision.models.resnet18(weights=None)
        self.encoder = nn.Sequential(*list(resnet.children())[:-1])
        self.projection_head = ProjectionHead(encoder_dim, hidden_dim, projection_dim)
    
    def forward(self, x):
        h = self.encoder(x)
        h = h.view(h.size(0), -1)
        z = self.projection_head(h)
        return h, z

class NTXentLoss(nn.Module):
    """Normalized Temperature-scaled Cross Entropy Loss"""
    def __init__(self, temperature=0.5):
        super().__init__()
        self.temperature = temperature
    
    def forward(self, z_i, z_j):
        batch_size = z_i.size(0)
        z = torch.cat([z_i, z_j], dim=0)  # 2N x D
        
        # Normalize embeddings
        z = F.normalize(z, dim=1)
        
        # Compute similarity matrix
        sim = torch.mm(z, z.t()) / self.temperature
        
        # Create mask to remove self-similarity
        # Use -65500 instead of -9e15 for FP16 compatibility
        # FP16 range is approximately ±65,504
        mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device)
        sim = sim.masked_fill(mask, -65500.0)
        
        # Positive pairs are at indices (i, i+N) and (i+N, i)
        pos_sim = torch.cat([
            torch.diag(sim, batch_size),
            torch.diag(sim, -batch_size)
        ])
        
        # Compute loss
        loss = -pos_sim + torch.logsumexp(sim, dim=1)
        return loss.mean()

class LARSOptimizer(torch.optim.Optimizer):
    """LARS optimizer for large batch training"""
    def __init__(self, params, lr=0.1, momentum=0.9, weight_decay=1e-6, trust_coef=0.001):
        defaults = dict(lr=lr, momentum=momentum, weight_decay=weight_decay, trust_coef=trust_coef)
        super().__init__(params, defaults)
    
    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()
        
        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            trust_coef = group['trust_coef']
            lr = group['lr']
            
            for p in group['params']:
                if p.grad is None:
                    continue
                
                param_norm = torch.norm(p.data)
                grad_norm = torch.norm(p.grad.data)
                
                if param_norm != 0 and grad_norm != 0:
                    adaptive_lr = trust_coef * param_norm / (grad_norm + weight_decay * param_norm)
                    adaptive_lr = min(adaptive_lr, lr)
                else:
                    adaptive_lr = lr
                
                if 'momentum_buffer' not in self.state[p]:
                    buf = self.state[p]['momentum_buffer'] = torch.zeros_like(p.data)
                else:
                    buf = self.state[p]['momentum_buffer']
                
                buf.mul_(momentum).add_(p.grad.data + weight_decay * p.data, alpha=adaptive_lr)
                p.data.add_(buf, alpha=-1)
        
        return loss

def train_simclr(model, dataloader, epochs=800, base_lr=0.3, temperature=0.5, device='cuda'):
    """Training loop for SimCLR with mixed precision and optimizations"""
    model = model.to(device)
    
    # Use LARS optimizer for large batch training
    optimizer = LARSOptimizer(
        model.parameters(),
        lr=base_lr,
        momentum=0.9,
        weight_decay=1e-6
    )
    
    # Cosine annealing scheduler
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    
    criterion = NTXentLoss(temperature=temperature)
    
    # Mixed precision training - use updated API
    scaler = GradScaler(device='cuda')
    
    # Enable cuDNN benchmarking for faster training
    torch.backends.cudnn.benchmark = True
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        for batch_idx, batch in enumerate(dataloader):
            # Properly unpack the batch
            (x_i, x_j), _ = batch
            x_i, x_j = x_i.to(device, non_blocking=True), x_j.to(device, non_blocking=True)
            
            # Mixed precision forward pass
            optimizer.zero_grad(set_to_none=True)  # More efficient than zero_grad()
            
            with autocast('cuda'):
                _, z_i = model(x_i)
                _, z_j = model(x_j)
                loss = criterion(z_i, z_j)
            
            # Mixed precision backward pass
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            total_loss += loss.item()
            
            if batch_idx % 10 == 0:
                print(f'Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}/{len(dataloader)}], '
                      f'Loss: {loss.item():.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f}')
        
        # Step the scheduler
        scheduler.step()
        
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch [{epoch+1}/{epochs}] Average Loss: {avg_loss:.4f}\n')
        
        # Save checkpoint every 100 epochs
        if (epoch + 1) % 100 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': avg_loss,
            }, f'simclr_checkpoint_epoch_{epoch+1}.pth')
    
    return model

# Example usage with CIFAR-10 dataset
if __name__ == "__main__":
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    
    # Download CIFAR-10 dataset
    dataset = torchvision.datasets.CIFAR10(
        root='./data',
        train=True,
        download=True,
        transform=ContrastiveAugmentation(img_size=32)
    )
    
    # Determine optimal batch size based on available GPU memory
    # For most modern GPUs (8GB+), 512 works well
    # Adjust based on your GPU: 1024 for 16GB+, 256 for 4-6GB
    batch_size = 512
    
    # Reduce num_workers to 4 as suggested by the warning
    dataloader = DataLoader(
        dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=4,  # Reduced from 8 to match system recommendation
        pin_memory=True,  # Faster data transfer to GPU
        persistent_workers=True,  # Keep workers alive between epochs
        prefetch_factor=2  # Prefetch batches
    )
    
    # Initialize model with larger projection head
    model = SimCLR(encoder_dim=512, projection_dim=128, hidden_dim=2048)
    
    # Calculate effective batch size (2N for contrastive pairs)
    effective_batch_size = batch_size * 2
    print(f"\nBatch size: {batch_size}")
    print(f"Effective batch size (contrastive pairs): {effective_batch_size}")
    
    # Scale learning rate with batch size (linear scaling rule)
    base_lr = 0.3 * (batch_size / 256)
    
    print("\nStarting self-supervised training...")
    print(f"Training for 800 epochs with batch size {batch_size}")
    print(f"Base learning rate: {base_lr:.4f}")
    print(f"Temperature: 0.5")
    print(f"Using mixed precision training (FP16)")
    
    trained_model = train_simclr(
        model, 
        dataloader, 
        epochs=800, 
        base_lr=base_lr,
        temperature=0.5,
        device=device
    )
    
    # Save the final encoder
    torch.save(trained_model.encoder.state_dict(), 'simclr_encoder_final.pth')
    torch.save(trained_model.state_dict(), 'simclr_model_final.pth')
    
    print("\nTraining complete!")
    print("Encoder saved to 'simclr_encoder_final.pth'")
    print("Full model saved to 'simclr_model_final.pth'")
    print("\nYou can now use this encoder for downstream tasks like classification!")

Using device: cuda
GPU: Tesla T4
Available GPU memory: 15.83 GB


100%|██████████| 170M/170M [00:15<00:00, 11.2MB/s] 



Batch size: 512
Effective batch size (contrastive pairs): 1024

Starting self-supervised training...
Training for 800 epochs with batch size 512
Base learning rate: 0.6000
Temperature: 0.5
Using mixed precision training (FP16)
Epoch [1/800], Batch [0/98], Loss: 6.8303, LR: 0.600000
Epoch [1/800], Batch [10/98], Loss: 6.5610, LR: 0.600000
Epoch [1/800], Batch [20/98], Loss: 6.2221, LR: 0.600000
Epoch [1/800], Batch [30/98], Loss: 6.1341, LR: 0.600000
Epoch [1/800], Batch [40/98], Loss: 6.0815, LR: 0.600000
Epoch [1/800], Batch [50/98], Loss: 6.0222, LR: 0.600000
Epoch [1/800], Batch [60/98], Loss: 6.0302, LR: 0.600000
Epoch [1/800], Batch [70/98], Loss: 6.0186, LR: 0.600000
Epoch [1/800], Batch [80/98], Loss: 5.9904, LR: 0.600000
Epoch [1/800], Batch [90/98], Loss: 5.9355, LR: 0.600000
Epoch [1/800] Average Loss: 6.1256

Epoch [2/800], Batch [0/98], Loss: 5.9618, LR: 0.599998
Epoch [2/800], Batch [10/98], Loss: 5.9208, LR: 0.599998
Epoch [2/800], Batch [20/98], Loss: 5.8605, LR: 0.5999

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

class ProjectionHead(nn.Module):
    """MLP projection head with batch normalization for contrastive learning"""
    def __init__(self, in_dim=512, hidden_dim=2048, out_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim, bias=False),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, out_dim, bias=False),
            nn.BatchNorm1d(out_dim)
        )
    
    def forward(self, x):
        return self.net(x)

class SimCLR(nn.Module):
    """SimCLR self-supervised learning model"""
    def __init__(self, encoder_dim=512, projection_dim=128, hidden_dim=2048):
        super().__init__()
        # ResNet18 encoder
        resnet = torchvision.models.resnet18(weights=None)
        self.encoder = nn.Sequential(*list(resnet.children())[:-1])
        self.projection_head = ProjectionHead(encoder_dim, hidden_dim, projection_dim)
    
    def forward(self, x):
        h = self.encoder(x)
        h = h.view(h.size(0), -1)
        z = self.projection_head(h)
        return h, z

class LinearClassifier(nn.Module):
    """Linear classifier on top of frozen encoder"""
    def __init__(self, encoder, num_classes=10):
        super().__init__()
        self.encoder = encoder
        self.classifier = nn.Linear(512, num_classes)
        
        # Freeze encoder
        for param in self.encoder.parameters():
            param.requires_grad = False
    
    def forward(self, x):
        with torch.no_grad():
            features = self.encoder(x)
            features = features.view(features.size(0), -1)
        return self.classifier(features)

def extract_features(model, dataloader, device):
    """Extract features from the encoder"""
    model.eval()
    features_list = []
    labels_list = []
    
    print("Extracting features...")
    with torch.no_grad():
        for images, labels in tqdm(dataloader):
            images = images.to(device)
            
            # Get features from encoder
            h, _ = model(images)
            features_list.append(h.cpu().numpy())
            labels_list.append(labels.numpy())
    
    features = np.vstack(features_list)
    labels = np.concatenate(labels_list)
    
    return features, labels

def linear_evaluation(train_features, train_labels, test_features, test_labels):
    """Evaluate with linear classifier (sklearn)"""
    print("\nTraining linear classifier on frozen features...")
    
    # Normalize features
    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
    test_features_scaled = scaler.transform(test_features)
    
    # Train logistic regression
    clf = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1)
    clf.fit(train_features_scaled, train_labels)
    
    # Predictions
    train_preds = clf.predict(train_features_scaled)
    test_preds = clf.predict(test_features_scaled)
    
    return train_preds, test_preds, clf

def finetune_evaluation(encoder, train_loader, test_loader, device, epochs=10):
    """Fine-tune evaluation with a linear classifier on top of frozen encoder"""
    print("\nFine-tuning linear classifier on frozen encoder...")
    
    # Create classifier
    classifier = LinearClassifier(encoder, num_classes=10).to(device)
    
    # Optimizer and loss
    optimizer = torch.optim.Adam(classifier.classifier.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    # Training
    for epoch in range(epochs):
        classifier.train()
        total_loss = 0
        correct = 0
        total = 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = classifier(images)
            loss = criterion(outputs, labels)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Statistics
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_acc = 100. * correct / total
        print(f'Epoch {epoch+1}: Loss: {total_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%')
    
    # Evaluation
    classifier.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = classifier(images)
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    return np.array(all_preds), np.array(all_labels)

def calculate_metrics(y_true, y_pred, class_names):
    """Calculate comprehensive metrics"""
    
    # Accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # F1 Scores
    f1_micro = f1_score(y_true, y_pred, average='micro')
    f1_macro = f1_score(y_true, y_pred, average='macro')
    f1_weighted = f1_score(y_true, y_pred, average='weighted')
    f1_per_class = f1_score(y_true, y_pred, average=None)
    
    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Per-class accuracy (IoU for classification)
    per_class_acc = cm.diagonal() / cm.sum(axis=1)
    
    # Mean IoU (Intersection over Union)
    # For classification: IoU = TP / (TP + FP + FN)
    iou_per_class = []
    for i in range(len(class_names)):
        tp = cm[i, i]
        fp = cm[:, i].sum() - tp
        fn = cm[i, :].sum() - tp
        iou = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0
        iou_per_class.append(iou)
    
    mean_iou = np.mean(iou_per_class)
    
    return {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        'f1_per_class': f1_per_class,
        'confusion_matrix': cm,
        'per_class_accuracy': per_class_acc,
        'iou_per_class': iou_per_class,
        'mean_iou': mean_iou
    }

def plot_confusion_matrix(cm, class_names, save_path='confusion_matrix.png'):
    """Plot confusion matrix"""
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix', fontsize=16, fontweight='bold')
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"✓ Confusion matrix saved to: {save_path}")

def plot_per_class_metrics(f1_scores, accuracies, ious, class_names, save_path='per_class_metrics.png'):
    """Plot per-class metrics"""
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # F1 Scores
    axes[0].bar(range(len(class_names)), f1_scores, color='steelblue', alpha=0.8)
    axes[0].set_xticks(range(len(class_names)))
    axes[0].set_xticklabels(class_names, rotation=45, ha='right')
    axes[0].set_ylabel('F1 Score', fontsize=12)
    axes[0].set_title('F1 Score per Class', fontsize=14, fontweight='bold')
    axes[0].grid(axis='y', alpha=0.3)
    axes[0].set_ylim([0, 1])
    
    # Accuracy
    axes[1].bar(range(len(class_names)), accuracies, color='green', alpha=0.8)
    axes[1].set_xticks(range(len(class_names)))
    axes[1].set_xticklabels(class_names, rotation=45, ha='right')
    axes[1].set_ylabel('Accuracy', fontsize=12)
    axes[1].set_title('Accuracy per Class', fontsize=14, fontweight='bold')
    axes[1].grid(axis='y', alpha=0.3)
    axes[1].set_ylim([0, 1])
    
    # IoU
    axes[2].bar(range(len(class_names)), ious, color='orange', alpha=0.8)
    axes[2].set_xticks(range(len(class_names)))
    axes[2].set_xticklabels(class_names, rotation=45, ha='right')
    axes[2].set_ylabel('IoU', fontsize=12)
    axes[2].set_title('IoU per Class', fontsize=14, fontweight='bold')
    axes[2].grid(axis='y', alpha=0.3)
    axes[2].set_ylim([0, 1])
    
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"✓ Per-class metrics saved to: {save_path}")

def print_evaluation_report(metrics, class_names, method_name="Model"):
    """Print comprehensive evaluation report"""
    print("\n" + "="*80)
    print(f"{method_name} EVALUATION RESULTS")
    print("="*80)
    
    print(f"\n📊 Overall Metrics:")
    print(f"   Accuracy:        {metrics['accuracy']*100:.2f}%")
    print(f"   F1 Score (Micro): {metrics['f1_micro']:.4f}")
    print(f"   F1 Score (Macro): {metrics['f1_macro']:.4f}")
    print(f"   F1 Score (Weighted): {metrics['f1_weighted']:.4f}")
    print(f"   Mean IoU:        {metrics['mean_iou']:.4f}")
    
    print(f"\n📈 Per-Class Metrics:")
    print(f"{'Class':<15} {'Accuracy':<12} {'F1 Score':<12} {'IoU':<12}")
    print("-" * 80)
    for i, class_name in enumerate(class_names):
        print(f"{class_name:<15} {metrics['per_class_accuracy'][i]:>10.4f}  "
              f"{metrics['f1_per_class'][i]:>10.4f}  {metrics['iou_per_class'][i]:>10.4f}")
    
    print("\n" + "="*80 + "\n")

def evaluate_simclr(model_path, device='cuda', method='linear'):
    """
    Complete evaluation pipeline
    
    Args:
        model_path: Path to saved SimCLR model
        device: Device to use
        method: 'linear' for sklearn LogReg, 'finetune' for PyTorch linear classifier
    """
    
    # Set device
    device = torch.device(device if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # CIFAR-10 class names
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
                   'dog', 'frog', 'horse', 'ship', 'truck']
    
    # Load model
    print(f"\nLoading model from: {model_path}")
    model = SimCLR(encoder_dim=512, projection_dim=128, hidden_dim=2048)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    print("✓ Model loaded successfully")
    
    # Prepare datasets
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    train_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=False, transform=transform
    )
    test_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=False, transform=transform
    )
    
    if method == 'linear':
        # Extract features for sklearn evaluation
        train_loader = DataLoader(train_dataset, batch_size=256, shuffle=False, 
                                 num_workers=4, pin_memory=True)
        test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False,
                                num_workers=4, pin_memory=True)
        
        # Extract features
        train_features, train_labels = extract_features(model, train_loader, device)
        test_features, test_labels = extract_features(model, test_loader, device)
        
        # Linear evaluation
        _, test_preds, _ = linear_evaluation(
            train_features, train_labels, test_features, test_labels
        )
        
        # Calculate metrics
        metrics = calculate_metrics(test_labels, test_preds, class_names)
        
    elif method == 'finetune':
        # Fine-tune evaluation with PyTorch
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True,
                                 num_workers=4, pin_memory=True)
        test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False,
                                num_workers=4, pin_memory=True)
        
        # Get predictions
        test_preds, test_labels = finetune_evaluation(
            model.encoder, train_loader, test_loader, device, epochs=10
        )
        
        # Calculate metrics
        metrics = calculate_metrics(test_labels, test_preds, class_names)
    
    else:
        raise ValueError(f"Unknown method: {method}. Use 'linear' or 'finetune'")
    
    # Print results
    print_evaluation_report(metrics, class_names, 
                           method_name=f"SimCLR ({method.upper()} EVALUATION)")
    
    # Visualizations
    plot_confusion_matrix(metrics['confusion_matrix'], class_names,
                         save_path=f'confusion_matrix_{method}.png')
    
    plot_per_class_metrics(
        metrics['f1_per_class'],
        metrics['per_class_accuracy'],
        metrics['iou_per_class'],
        class_names,
        save_path=f'per_class_metrics_{method}.png'
    )
    
    # Detailed classification report
    print("\n📋 Detailed Classification Report:")
    print("-" * 80)
    if method == 'linear':
        from sklearn.metrics import classification_report
        print(classification_report(test_labels, test_preds, target_names=class_names))
    else:
        from sklearn.metrics import classification_report
        print(classification_report(test_labels, test_preds, target_names=class_names))
    
    return metrics

def compare_with_supervised_baseline(device='cuda'):
    """Compare SimCLR with supervised baseline"""
    print("\n" + "="*80)
    print("COMPARING WITH SUPERVISED BASELINE")
    print("="*80)
    
    # Train supervised ResNet18
    print("\nTraining supervised ResNet18 baseline...")
    
    transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    train_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=False, transform=transform
    )
    test_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=False, transform=test_transform
    )
    
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4)
    
    # Create supervised model
    model = torchvision.models.resnet18(weights=None, num_classes=10).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    # Train for 10 epochs
    for epoch in range(10):
        model.train()
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/10"):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    # Evaluate
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
                   'dog', 'frog', 'horse', 'ship', 'truck']
    
    metrics = calculate_metrics(np.array(all_labels), np.array(all_preds), class_names)
    print_evaluation_report(metrics, class_names, method_name="SUPERVISED BASELINE")
    
    return metrics

if __name__ == "__main__":
    print("\n" + "="*80)
    print("SimCLR MODEL EVALUATION")
    print("="*80)
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Check for model file
    model_path = '/kaggle/working/simclr_model_final.pth'
    try:
        print("\n🔍 Evaluation Method:")
        print("1. Linear Evaluation (sklearn Logistic Regression)")
        print("2. Fine-tune Evaluation (PyTorch Linear Classifier)")
        print("3. Both methods")
        print("4. Compare with supervised baseline")
        
        choice = input("\nSelect method (1/2/3/4): ").strip()
        
        if choice == '1':
            metrics = evaluate_simclr(model_path, device=device, method='linear')
        
        elif choice == '2':
            metrics = evaluate_simclr(model_path, device=device, method='finetune')
        
        elif choice == '3':
            print("\n" + "="*80)
            print("METHOD 1: LINEAR EVALUATION")
            print("="*80)
            metrics_linear = evaluate_simclr(model_path, device=device, method='linear')
            
            print("\n" + "="*80)
            print("METHOD 2: FINE-TUNE EVALUATION")
            print("="*80)
            metrics_finetune = evaluate_simclr(model_path, device=device, method='finetune')
            
            # Comparison
            print("\n" + "="*80)
            print("COMPARISON SUMMARY")
            print("="*80)
            print(f"\n{'Metric':<25} {'Linear':<15} {'Fine-tune':<15}")
            print("-" * 80)
            print(f"{'Accuracy':<25} {metrics_linear['accuracy']*100:>13.2f}%  {metrics_finetune['accuracy']*100:>13.2f}%")
            print(f"{'F1 Score (Macro)':<25} {metrics_linear['f1_macro']:>13.4f}  {metrics_finetune['f1_macro']:>13.4f}")
            print(f"{'Mean IoU':<25} {metrics_linear['mean_iou']:>13.4f}  {metrics_finetune['mean_iou']:>13.4f}")
        
        elif choice == '4':
            # Evaluate SimCLR
            print("\n" + "="*80)
            print("EVALUATING SimCLR")
            print("="*80)
            metrics_simclr = evaluate_simclr(model_path, device=device, method='finetune')
            
            # Compare with supervised
            metrics_supervised = compare_with_supervised_baseline(device=device)
            
            # Final comparison
            print("\n" + "="*80)
            print("FINAL COMPARISON: SimCLR vs SUPERVISED")
            print("="*80)
            print(f"\n{'Metric':<25} {'SimCLR':<15} {'Supervised':<15}")
            print("-" * 80)
            print(f"{'Accuracy':<25} {metrics_simclr['accuracy']*100:>13.2f}%  {metrics_supervised['accuracy']*100:>13.2f}%")
            print(f"{'F1 Score (Macro)':<25} {metrics_simclr['f1_macro']:>13.4f}  {metrics_supervised['f1_macro']:>13.4f}")
            print(f"{'Mean IoU':<25} {metrics_simclr['mean_iou']:>13.4f}  {metrics_supervised['mean_iou']:>13.4f}")
        
        else:
            print("Invalid choice!")
    
    except FileNotFoundError:
        print(f"\n❌ Error: Model file '{model_path}' not found!")
        print("Please train the model first or provide the correct path.")
    
    except Exception as e:
        print(f"\n❌ Error during evaluation: {e}")
        raise e
    
    print("\n✅ Evaluation complete!")


SimCLR MODEL EVALUATION

🔍 Evaluation Method:
1. Linear Evaluation (sklearn Logistic Regression)
2. Fine-tune Evaluation (PyTorch Linear Classifier)
3. Both methods
4. Compare with supervised baseline



Select method (1/2/3/4):  3



METHOD 1: LINEAR EVALUATION
Using device: cuda

Loading model from: /kaggle/working/simclr_model_final.pth
✓ Model loaded successfully
Extracting features...


100%|██████████| 196/196 [00:06<00:00, 31.82it/s]


Extracting features...


100%|██████████| 40/40 [00:01<00:00, 30.98it/s]



Training linear classifier on frozen features...

SimCLR (LINEAR EVALUATION) EVALUATION RESULTS

📊 Overall Metrics:
   Accuracy:        79.65%
   F1 Score (Micro): 0.7965
   F1 Score (Macro): 0.7960
   F1 Score (Weighted): 0.7960
   Mean IoU:        0.6726

📈 Per-Class Metrics:
Class           Accuracy     F1 Score     IoU         
--------------------------------------------------------------------------------
airplane            0.8310      0.8343      0.7158
automobile          0.9180      0.9189      0.8500
bird                0.6650      0.6845      0.5203
cat                 0.6240      0.6094      0.4382
deer                0.7980      0.7658      0.6205
dog                 0.6450      0.6815      0.5168
frog                0.8570      0.8398      0.7238
horse               0.8150      0.8232      0.6996
ship                0.9060      0.9074      0.8304
truck               0.9060      0.8953      0.8104


✓ Confusion matrix saved to: confusion_matrix_linear.png
✓ Per-class met

Epoch 1/10: 100%|██████████| 391/391 [00:06<00:00, 63.60it/s]


Epoch 1: Loss: 0.6596, Train Acc: 77.75%


Epoch 2/10: 100%|██████████| 391/391 [00:06<00:00, 63.46it/s]


Epoch 2: Loss: 0.5707, Train Acc: 80.12%


Epoch 3/10: 100%|██████████| 391/391 [00:06<00:00, 62.73it/s]


Epoch 3: Loss: 0.5575, Train Acc: 80.46%


Epoch 4/10: 100%|██████████| 391/391 [00:06<00:00, 63.31it/s]


Epoch 4: Loss: 0.5496, Train Acc: 80.80%


Epoch 5/10: 100%|██████████| 391/391 [00:06<00:00, 63.26it/s]


Epoch 5: Loss: 0.5420, Train Acc: 81.03%


Epoch 6/10: 100%|██████████| 391/391 [00:06<00:00, 61.69it/s]


Epoch 6: Loss: 0.5379, Train Acc: 80.96%


Epoch 7/10: 100%|██████████| 391/391 [00:06<00:00, 63.17it/s]


Epoch 7: Loss: 0.5298, Train Acc: 81.33%


Epoch 8/10: 100%|██████████| 391/391 [00:06<00:00, 62.17it/s]


Epoch 8: Loss: 0.5306, Train Acc: 81.45%


Epoch 9/10: 100%|██████████| 391/391 [00:06<00:00, 63.39it/s]


Epoch 9: Loss: 0.5277, Train Acc: 81.51%


Epoch 10/10: 100%|██████████| 391/391 [00:06<00:00, 62.27it/s]

Epoch 10: Loss: 0.5261, Train Acc: 81.50%






SimCLR (FINETUNE EVALUATION) EVALUATION RESULTS

📊 Overall Metrics:
   Accuracy:        79.53%
   F1 Score (Micro): 0.7953
   F1 Score (Macro): 0.7950
   F1 Score (Weighted): 0.7950
   Mean IoU:        0.6705

📈 Per-Class Metrics:
Class           Accuracy     F1 Score     IoU         
--------------------------------------------------------------------------------
airplane            0.8460      0.8218      0.6974
automobile          0.9000      0.9174      0.8475
bird                0.6910      0.6865      0.5227
cat                 0.6370      0.6149      0.4439
deer                0.7740      0.7701      0.6262
dog                 0.6210      0.6813      0.5166
frog                0.8480      0.8289      0.7078
horse               0.8430      0.8434      0.7292
ship                0.8870      0.8969      0.8130
truck               0.9060      0.8891      0.8004


✓ Confusion matrix saved to: confusion_matrix_finetune.png
✓ Per-class metrics saved to: per_class_metrics_finetune.png
