In [47]:
# Setup and Imports

import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from tqdm import tqdm
import os
import gc

# Set device
device = torch.device('mps' if torch.backends.mps.is_available() else ('cuda' if torch.cuda.is_available() else 'cpu'))
print(f"Using device: {device}")

# For reproducibility
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)

Using device: cuda


In [48]:
# Model Architecture - Spatial Attention Module

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        padding = kernel_size // 2
        self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Generate spatial attention map
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        attention = torch.cat([avg_out, max_out], dim=1)
        attention = self.conv(attention)
        attention = self.sigmoid(attention)
        
        return x * attention

In [49]:
# Model Architecture - Attention Residual Block

class AttentionResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(AttentionResidualBlock, self).__init__()
        
        # Main path
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # Channel attention
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        reduction = max(out_channels // 16, 4)  # Ensure at least 4 channels
        self.channel_attention = nn.Sequential(
            nn.Conv2d(out_channels, out_channels // reduction, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels // reduction, out_channels, 1, bias=False),
            nn.Sigmoid()
        )
        
        # Spatial attention
        self.spatial_attention = SpatialAttention(kernel_size=7)
        
        # Shortcut connection
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, 
                          stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        identity = x
        
        # Main path
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        # Channel attention
        avg_out = self.channel_attention(self.avg_pool(out))
        max_out = self.channel_attention(self.max_pool(out))
        out = out * (avg_out + max_out)
        
        # Spatial attention
        out = self.spatial_attention(out)
        
        # Residual connection
        identity = self.shortcut(identity)
        out += identity
        out = self.relu(out)
        
        return out

In [50]:
# Model Architecture - Enhanced Efficient ResNet

class EnhancedEfficientResNet(nn.Module):
    def __init__(self, num_classes=10, base_width=32):
        super(EnhancedEfficientResNet, self).__init__()
        
        # Initial convolution
        self.conv1 = nn.Conv2d(3, base_width, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(base_width)
        self.relu = nn.ReLU(inplace=True)
        
        # Layer configurations
        self.layer1 = self._make_layer(base_width, base_width*2, 2, stride=1)
        self.layer2 = self._make_layer(base_width*2, base_width*4, 2, stride=2)
        self.layer3 = self._make_layer(base_width*4, base_width*8, 2, stride=2)
        self.layer4 = self._make_layer(base_width*8, base_width*8, 2, stride=1)  # New layer
        
        # Global pooling and classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(base_width*8, num_classes)
    
    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        layers = [
            AttentionResidualBlock(in_channels, out_channels, stride)
        ]
        
        for _ in range(1, blocks):
            layers.append(
                AttentionResidualBlock(out_channels, out_channels)
            )
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        
        return x

In [51]:
# Model Architecture - Original Model (for compatibility)

class ChannelAttentionBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ChannelAttentionBlock, self).__init__()
        
        # Main path
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # Channel attention
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        reduction = max(out_channels // 16, 4)  # Ensure at least 4 channels
        self.attention = nn.Sequential(
            nn.Conv2d(out_channels, out_channels // reduction, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels // reduction, out_channels, 1, bias=False),
            nn.Sigmoid()
        )
        
        # Shortcut connection
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, 
                          stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        identity = x
        
        # Main path
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        # Channel attention
        avg_out = self.attention(self.avg_pool(out))
        max_out = self.attention(self.max_pool(out))
        out = out * (avg_out + max_out)
        
        # Residual connection
        identity = self.shortcut(identity)
        out += identity
        out = self.relu(out)
        
        return out

class EfficientResNet(nn.Module):
    def __init__(self, num_classes=10, base_width=32):
        super(EfficientResNet, self).__init__()
        
        # Initial convolution
        self.conv1 = nn.Conv2d(3, base_width, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(base_width)
        self.relu = nn.ReLU(inplace=True)
        
        # Layer configurations
        self.layer1 = self._make_layer(base_width, base_width*2, 2, stride=1)
        self.layer2 = self._make_layer(base_width*2, base_width*4, 2, stride=2)
        self.layer3 = self._make_layer(base_width*4, base_width*8, 2, stride=2)
        
        # Global pooling and classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.25)
        self.fc = nn.Linear(base_width*8, num_classes)
    
    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        layers = [
            ChannelAttentionBlock(in_channels, out_channels, stride)
        ]
        
        for _ in range(1, blocks):
            layers.append(
                ChannelAttentionBlock(out_channels, out_channels)
            )
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        
        return x

In [52]:
# Test Time Augmentation Setup

# Define test-time normalization for CIFAR-10
test_normalization = transforms.Normalize(
    mean=(0.4914, 0.4822, 0.4465), 
    std=(0.2023, 0.1994, 0.2010)
)

# Advanced test-time augmentation transforms
advanced_transforms = [
    # 1. Original transform (base)
    transforms.Compose([
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 2. Horizontal flip
    transforms.Compose([
        transforms.RandomHorizontalFlip(p=1.0),
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 3. Small crop 1
    transforms.Compose([
        transforms.RandomCrop(32, padding=4, padding_mode='reflect'),
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 4. Small crop 2 (different padding)
    transforms.Compose([
        transforms.RandomCrop(32, padding=4, padding_mode='edge'),
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 5. Slight rotate 1
    transforms.Compose([
        transforms.RandomRotation(5),
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 6. Slight rotate 2
    transforms.Compose([
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 7. Color jitter
    transforms.Compose([
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.02),
        transforms.ToTensor(),
        test_normalization,
    ]),
    # 8. Color jitter 2
    transforms.Compose([
        transforms.ColorJitter(brightness=0.05, contrast=0.15, saturation=0.05, hue=0),
        transforms.ToTensor(),
        test_normalization,
    ]),
]

In [53]:
# Custom CIFAR Test Dataset with Enhanced Data Handling

class EnhancedCIFARTestDataset(Dataset):
    def __init__(self, pkl_file_path, transform=None):
        """
        Args:
            pkl_file_path (string): Path to the .pkl file containing test data
            transform (callable, optional): Transform to be applied on a sample
        """
        self.transform = transform
        
        # Load and process test data with error handling
        try:
            with open(pkl_file_path, 'rb') as f:
                data = pickle.load(f, encoding='bytes')
            
            # Extract images and IDs
            self.images = data[b'data']
            self.ids = data[b'ids'] if b'ids' in data else np.arange(len(self.images))
            
            # Handle different data formats
            if len(self.images.shape) == 2:
                # If images are stored as flat arrays (N, 3072), reshape
                print(f"Reshaping flat images of shape {self.images.shape}")
                self.images = self.images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
                print(f"Reshaped to {self.images.shape}")
        except Exception as e:
            print(f"Error loading data: {e}")
            raise
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        # Convert numpy array to PIL Image with proper error handling
        try:
            image = self.images[idx]
            if not isinstance(image, Image.Image):
                image = Image.fromarray(image.astype('uint8'))
            
            if self.transform:
                image = self.transform(image)
            
            return image, self.ids[idx]
        except Exception as e:
            print(f"Error processing image {idx}: {e}")
            # Return a dummy image as fallback
            if self.transform:
                return torch.zeros(3, 32, 32), self.ids[idx]
            else:
                return np.zeros((32, 32, 3), dtype=np.uint8), self.ids[idx]

In [54]:
# Enhanced Test-Time Augmentation Prediction

def enhanced_tta_prediction(model, pkl_file_path, output_filename="enhanced_submission.csv", num_transforms=8):
    """
    Advanced test-time augmentation with weighted averaging of predictions
    """
    print("Starting enhanced TTA prediction...")
    model.eval()
    
    # Select requested number of transforms
    transforms_to_use = advanced_transforms[:num_transforms]
    
    # Weights for different transforms (giving higher weight to original image)
    weights = [2.0]  # Increase from 1.5 to 2.0
    weights.extend([1.0] * (len(transforms_to_use) - 1))
    
    # Normalize weights
    weights = [w / sum(weights) for w in weights]
    
    # Storage for all predictions
    all_probs = []
    image_ids = None
    
    # Process each transform
    for i, transform in enumerate(tqdm(transforms_to_use, desc="Processing augmentations")):
        # Create dataset with this transform
        dataset = EnhancedCIFARTestDataset(pkl_file_path, transform=transform)
        dataloader = DataLoader(
            dataset, 
            batch_size=32,  # Smaller batch size to avoid OOM
            shuffle=False, 
            num_workers=2, 
            pin_memory=True
        )
        
        # Collect batch predictions
        batch_probs = []
        batch_ids = []
        
        with torch.no_grad():
            for images, ids in dataloader:
                images = images.to(device)
                outputs = model(images)
                
                # Apply temperature scaling for better calibration
                outputs = outputs / 0.9  # Temperature parameter
                
                # Get softmax probabilities
                probs = F.softmax(outputs, dim=1)
                
                batch_probs.append(probs.cpu().numpy())
                batch_ids.append(ids.numpy())
                
                # Free up memory
                del images, outputs, probs
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
        
        # Concatenate all batches
        augmentation_probs = np.concatenate(batch_probs)
        
        # Apply weight to this augmentation's predictions
        all_probs.append(augmentation_probs * weights[i])
        
        # Store IDs (same for all augmentations)
        if image_ids is None:
            image_ids = np.concatenate(batch_ids)
        
        # Free memory
        del batch_probs, augmentation_probs
        gc.collect()
    
    # Combine predictions by averaging softmax probabilities
    avg_probs = np.sum(all_probs, axis=0)
    final_preds = np.argmax(avg_probs, axis=1)
    
    # Create and save submission
    submission_df = pd.DataFrame({
        'ID': image_ids, 
        'Labels': final_preds
    })
    submission_df = submission_df.sort_values('ID')
    submission_df.to_csv(output_filename, index=False)
    print(f"Enhanced TTA submission file created: {output_filename}")
    return submission_df

In [55]:
# Class-Specialized Prediction Handling

def class_specialized_prediction(model, pkl_file_path, output_filename="specialized_submission.csv"):
    """
    Creates predictions with specialized handling for different classes based on confidence thresholds
    """
    print("Starting class-specialized prediction...")
    model.eval()
    
    # Initial pass with base transform
    base_transform = advanced_transforms[0]
    dataset = EnhancedCIFARTestDataset(pkl_file_path, transform=base_transform)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)
    
    # First pass - get initial predictions and confidence
    initial_preds = []
    confidence_scores = []
    image_ids = []
    
    with torch.no_grad():
        for images, ids in tqdm(dataloader, desc="Initial prediction pass"):
            images = images.to(device)
            outputs = model(images)
            
            # Get softmax probabilities
            probs = F.softmax(outputs, dim=1)
            
            # Get predictions and confidence
            values, preds = torch.max(probs, dim=1)
            
            initial_preds.extend(preds.cpu().numpy())
            confidence_scores.extend(values.cpu().numpy())
            image_ids.extend(ids.numpy())
            
            # Free memory
            del images, outputs, probs, values, preds
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
    
    # Confidence thresholds for different classes - based on common confusion patterns
    class_confidence_thresholds = {
        0: 0.85,  # airplane
        1: 0.90,  # automobile
        2: 0.75,  # bird - still relatively difficult
        3: 0.70,  # cat - difficult class
        4: 0.75,  # deer
        5: 0.70,  # dog - difficult class
        6: 0.85,  # frog
        7: 0.85,  # horse
        8: 0.90,  # ship
        9: 0.90,  # truck
    }
    
    # Identify low confidence predictions
    low_conf_indices = []
    for i, (pred, conf) in enumerate(zip(initial_preds, confidence_scores)):
        if conf < class_confidence_thresholds.get(pred, 0.75):
            low_conf_indices.append(i)
    
    print(f"Found {len(low_conf_indices)} low confidence predictions ({len(low_conf_indices)/len(initial_preds)*100:.2f}%)")
    
    # For low confidence predictions, use enhanced TTA
    final_preds = list(initial_preds)
    
    if low_conf_indices:
        # Process all transforms for low confidence cases only
        low_conf_probs = []
        
        for transform in tqdm(advanced_transforms, desc="Processing difficult cases"):
            dataset = EnhancedCIFARTestDataset(pkl_file_path, transform=transform)
            dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)
            
            all_outputs = []
            
            with torch.no_grad():
                for batch_idx, (images, _) in enumerate(dataloader):
                    # Only process batches that contain low confidence samples
                    batch_start = batch_idx * 64
                    batch_end = min(batch_start + 64, len(dataset))
                    batch_indices = list(range(batch_start, batch_end))
                    
                    # Check if any low confidence indices are in this batch
                    process_batch = any(idx in low_conf_indices for idx in batch_indices)
                    
                    if process_batch:
                        images = images.to(device)
                        outputs = model(images)
                        all_outputs.append(outputs.cpu())
                    else:
                        # Skip this batch by adding dummy outputs
                        all_outputs.append(torch.zeros(len(batch_indices), 10))
            
            # Concatenate all outputs
            all_outputs = torch.cat(all_outputs)
            
            # Extract only the low confidence predictions
            selected_outputs = all_outputs[low_conf_indices]
            selected_probs = F.softmax(selected_outputs / 1.2, dim=1).numpy()  # Apply temperature
            
            low_conf_probs.append(selected_probs)
            
            # Free memory
            del all_outputs, selected_outputs, selected_probs
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        # Average probabilities for low confidence predictions
        avg_probs = np.mean(np.stack(low_conf_probs), axis=0)
        improved_preds = np.argmax(avg_probs, axis=1)
        
        # Update predictions for low confidence cases
        for i, idx in enumerate(low_conf_indices):
            final_preds[idx] = improved_preds[i]
    
    # Create and save submission
    submission_df = pd.DataFrame({
        'ID': image_ids, 
        'Labels': final_preds
    })
    submission_df = submission_df.sort_values('ID')
    submission_df.to_csv(output_filename, index=False)
    print(f"Class-specialized submission file created: {output_filename}")
    return submission_df

In [56]:
# Adaptive Combination of Multiple Prediction Methods

def adaptive_prediction(model_path, pkl_file_path, output_filename="adaptive_submission.csv"):
    """
    Creates predictions using an adaptive approach that combines multiple techniques
    """
    print("Starting adaptive prediction process...")
    
    # Load model
    try:
        # First try the enhanced model architecture
        model = EnhancedEfficientResNet(num_classes=10)
        model.load_state_dict(torch.load(model_path, map_location=device))
        print("Successfully loaded model with enhanced architecture")
    except:
        try:
            # Fall back to original architecture
            model = EfficientResNet(num_classes=10)
            model.load_state_dict(torch.load(model_path, map_location=device))
            print("Successfully loaded model with original architecture")
        except Exception as e:
            print(f"Error loading model: {e}")
            print("Attempting alternative loading method...")
            
            # Try loading just the state dict
            model = EnhancedEfficientResNet(num_classes=10)
            state_dict = torch.load(model_path, map_location=device)
            if isinstance(state_dict, dict) and 'state_dict' in state_dict:
                model.load_state_dict(state_dict['state_dict'])
            else:
                model.load_state_dict(state_dict, strict=False)
                print("Warning: Model loaded with strict=False, some weights may not be loaded")
    
    model = model.to(device)
    model.eval()
    
    # Step 1: Run enhanced TTA
    print("\nStep 1: Running enhanced TTA prediction...")
    enhanced_tta_prediction(model, pkl_file_path, "tmp_tta.csv", num_transforms=8)
    
    # Step 2: Run class-specialized prediction
    print("\nStep 2: Running class-specialized prediction...")
    class_specialized_prediction(model, pkl_file_path, "tmp_spec.csv")
    
    # Step 3: Combine predictions based on confidence
    print("\nStep 3: Combining predictions adaptively...")
    
    # Load both prediction files
    tta_df = pd.read_csv("tmp_tta.csv")
    spec_df = pd.read_csv("tmp_spec.csv")
    
    # Ensure they have the same order
    tta_df = tta_df.sort_values('ID')
    spec_df = spec_df.sort_values('ID')
    
    # Get confidence scores for all predictions
    base_transform = advanced_transforms[0]
    dataset = EnhancedCIFARTestDataset(pkl_file_path, transform=base_transform)
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)
    
    all_probs = []
    all_ids = []
    
    with torch.no_grad():
        for images, ids in tqdm(dataloader, desc="Computing confidence scores"):
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            all_probs.append(probs.cpu().numpy())
            all_ids.extend(ids.numpy())
    
    all_probs = np.concatenate(all_probs)
    id_to_index = {id_val: i for i, id_val in enumerate(all_ids)}
    
    # Combine predictions
    final_labels = []
    
    # Thresholds for decision making
    HIGH_CONF = 0.95  # Increase from 0.90
    MED_CONF = 0.80   # Increase from 0.75
    
    # Additional confidence boosting for specific classes
    class_boost = {
        2: 0.05,  # bird
        3: 0.05,  # cat
        5: 0.05,  # dog
    }
    
    for i in range(len(tta_df)):
        img_id = tta_df.iloc[i]['ID']
        tta_pred = tta_df.iloc[i]['Labels']
        spec_pred = spec_df.loc[spec_df['ID'] == img_id, 'Labels'].values[0]
        
        # Get confidence for both predictions
        idx = id_to_index[img_id]
        prob_vector = all_probs[idx]
        
        # Apply confidence boosting for certain classes
        tta_conf = prob_vector[tta_pred]
        if tta_pred in class_boost:
            tta_conf += class_boost[tta_pred]
            
        spec_conf = prob_vector[spec_pred]
        if spec_pred in class_boost:
            spec_conf += class_boost[spec_pred]
        
        # Decision logic
        if tta_pred == spec_pred:
            # Both methods agree
            final_labels.append(tta_pred)
        elif tta_conf > HIGH_CONF:
            # TTA prediction has very high confidence
            final_labels.append(tta_pred)
        elif spec_conf > HIGH_CONF:
            # Class specialized prediction has very high confidence
            final_labels.append(spec_pred)
        elif tta_conf > MED_CONF and tta_conf > spec_conf:
            # TTA has decent confidence and higher than class specialized
            final_labels.append(tta_pred)
        elif spec_conf > MED_CONF and spec_conf > tta_conf:
            # Class specialized has decent confidence and higher than TTA
            final_labels.append(spec_pred)
        elif tta_pred in [2, 3, 5]:
            # For difficult classes, prefer specialized prediction
            final_labels.append(spec_pred)
        else:
            # Default to enhanced TTA
            final_labels.append(tta_pred)
    
    # Create final submission
    submission_df = pd.DataFrame({
        'ID': tta_df['ID'],
        'Labels': final_labels
    })
    submission_df.to_csv(output_filename, index=False)
    
    # Clean up temporary files
    if os.path.exists("tmp_tta.csv"):
        os.remove("tmp_tta.csv")
    if os.path.exists("tmp_spec.csv"):
        os.remove("tmp_spec.csv")
    
    print(f"Adaptive submission file created: {output_filename}")
    return submission_df

In [57]:
# Generate Multiple Predictions for Ensemble

def generate_multiple_predictions(model_path, pkl_file_path, base_filename="submission"):
    """
    Generates multiple prediction files using different methods for final ensemble
    """
    print("Generating multiple predictions for ensemble...")
    
    # Load model
    try:
        model = EnhancedEfficientResNet(num_classes=10)
        model.load_state_dict(torch.load(model_path, map_location=device))
    except:
        model = EfficientResNet(num_classes=10)
        model.load_state_dict(torch.load(model_path, map_location=device))
    
    model = model.to(device)
    model.eval()
    
    # Method 1: Enhanced TTA
    enhanced_tta_prediction(
        model, 
        pkl_file_path, 
        f"{base_filename}_tta.csv",
        num_transforms=6  # Using fewer transforms for diversity
    )
    
    # Method 2: Class specialized with different thresholds
    class_specialized_prediction(
        model,
        pkl_file_path,
        f"{base_filename}_spec.csv"
    )
    
    # Method 3: Adaptive prediction
    adaptive_prediction(
        model_path,
        pkl_file_path,
        f"{base_filename}_adaptive.csv"
    )
    
    print(f"Generated 3 prediction files for ensemble submission")

In [58]:
# Ensemble Multiple Prediction Files

def ensemble_prediction_files(file_paths, output_filename="ensemble_submission.csv"):
    """
    Ensembles multiple prediction CSV files
    """
    print(f"Creating ensemble from {len(file_paths)} prediction files...")
    
    # Load all prediction files
    dataframes = []
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        df = df.sort_values('ID')
        dataframes.append(df)
    
    # Ensure all dataframes have the same IDs
    for i in range(1, len(dataframes)):
        assert np.array_equal(dataframes[0]['ID'].values, dataframes[i]['ID'].values), "ID mismatch between files"
    
    # Get predictions from each file
    all_preds = np.array([df['Labels'].values for df in dataframes])
    
    # Get majority vote for each sample
    final_preds = []
    for i in range(len(dataframes[0])):
        sample_preds = all_preds[:, i]
        values, counts = np.unique(sample_preds, return_counts=True)
        max_count_idx = np.argmax(counts)
        final_preds.append(values[max_count_idx])
    
    # Create final submission
    ensemble_df = pd.DataFrame({
        'ID': dataframes[0]['ID'],
        'Labels': final_preds
    })
    ensemble_df.to_csv(output_filename, index=False)
    print(f"Ensemble submission file created: {output_filename}")
    return ensemble_df

In [59]:
# Analyze Prediction Differences

def analyze_prediction_differences(file_paths, class_names=None):
    """
    Analyzes and visualizes differences between prediction files
    """
    if class_names is None:
        class_names = [
            'airplane', 'automobile', 'bird', 'cat', 'deer',
            'dog', 'frog', 'horse', 'ship', 'truck'
        ]
    
    # Load all prediction files
    dataframes = []
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        df = df.sort_values('ID')
        dataframes.append(df)
    
    # Compute agreement rate
    agreement_count = 0
    class_disagreements = {i: 0 for i in range(len(class_names))}
    
    for i in range(len(dataframes[0])):
        preds = [df.iloc[i]['Labels'] for df in dataframes]
        if len(set(preds)) == 1:
            agreement_count += 1
        else:
            # Track which classes have disagreements
            for pred in preds:
                class_disagreements[pred] += 1
    
    agreement_rate = agreement_count / len(dataframes[0]) * 100
    
    print(f"Agreement rate between prediction files: {agreement_rate:.2f}%")
    print("\nDisagreements by class:")
    for class_idx, count in class_disagreements.items():
        print(f"{class_names[class_idx]}: {count} disagreements")
    
    return agreement_rate, class_disagreements

In [60]:
# Main Execution Function

def main(model_path, pkl_file_path):
    print(f"Starting prediction process with model: {model_path}")
    print(f"Test data: {pkl_file_path}")
    
    # Method 1: Generate 3 different prediction files
    generate_multiple_predictions(model_path, pkl_file_path)
    
    # Method 2: Create ensemble from the 3 prediction files
    ensemble_prediction_files(
        [
            "submission_tta.csv",
            "submission_spec.csv",
            "submission_adaptive.csv"
        ],
        "ensemble_submission.csv"
    )
    
    # Method 3: Create adaptive prediction directly
    adaptive_prediction(model_path, pkl_file_path, "final_submission.csv")
    
    # Analyze differences between the prediction files
    print("\nAnalyzing prediction differences:")
    analyze_prediction_differences([
        "submission_tta.csv",
        "submission_spec.csv",
        "submission_adaptive.csv",
        "ensemble_submission.csv",
        "final_submission.csv"
    ])
    
    print("\nRecommended submission file: final_submission.csv")
    print("Alternative submission file: ensemble_submission.csv")

In [65]:
# Run the Code (update paths as needed)

if __name__ == "__main__":
    # Paths for Kaggle environment - update these for your environment
    MODEL_PATH = '/kaggle/input/best_ema_model_500epoch_95.30/pytorch/default/1/best_ema_model.pth'
    TEST_DATA_PATH = '/kaggle/input/deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl'

main(MODEL_PATH, TEST_DATA_PATH)

Starting prediction process with model: /kaggle/input/best_ema_model_500epoch_95.30/pytorch/default/1/best_ema_model.pth
Test data: /kaggle/input/deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl
Generating multiple predictions for ensemble...


  model.load_state_dict(torch.load(model_path, map_location=device))


Starting enhanced TTA prediction...


Processing augmentations: 100%|██████████| 6/6 [00:27<00:00,  4.57s/it]


Enhanced TTA submission file created: submission_tta.csv
Starting class-specialized prediction...


Initial prediction pass: 100%|██████████| 157/157 [00:03<00:00, 45.42it/s]


Found 6818 low confidence predictions (68.18%)


Processing difficult cases: 100%|██████████| 8/8 [00:30<00:00,  3.77s/it]
  model.load_state_dict(torch.load(model_path, map_location=device))


Class-specialized submission file created: submission_spec.csv
Starting adaptive prediction process...
Successfully loaded model with enhanced architecture

Step 1: Running enhanced TTA prediction...
Starting enhanced TTA prediction...


Processing augmentations: 100%|██████████| 8/8 [00:40<00:00,  5.01s/it]


Enhanced TTA submission file created: tmp_tta.csv

Step 2: Running class-specialized prediction...
Starting class-specialized prediction...


Initial prediction pass: 100%|██████████| 157/157 [00:03<00:00, 41.82it/s]


Found 6818 low confidence predictions (68.18%)


Processing difficult cases: 100%|██████████| 8/8 [00:29<00:00,  3.73s/it]


Class-specialized submission file created: tmp_spec.csv

Step 3: Combining predictions adaptively...


Computing confidence scores: 100%|██████████| 157/157 [00:03<00:00, 51.39it/s]


Adaptive submission file created: submission_adaptive.csv
Generated 3 prediction files for ensemble submission
Creating ensemble from 3 prediction files...
Ensemble submission file created: ensemble_submission.csv
Starting adaptive prediction process...


  model.load_state_dict(torch.load(model_path, map_location=device))


Successfully loaded model with enhanced architecture

Step 1: Running enhanced TTA prediction...
Starting enhanced TTA prediction...


Processing augmentations: 100%|██████████| 8/8 [00:39<00:00,  4.98s/it]


Enhanced TTA submission file created: tmp_tta.csv

Step 2: Running class-specialized prediction...
Starting class-specialized prediction...


Initial prediction pass: 100%|██████████| 157/157 [00:03<00:00, 41.95it/s]


Found 6818 low confidence predictions (68.18%)


Processing difficult cases: 100%|██████████| 8/8 [00:30<00:00,  3.79s/it]


Class-specialized submission file created: tmp_spec.csv

Step 3: Combining predictions adaptively...


Computing confidence scores: 100%|██████████| 157/157 [00:03<00:00, 51.17it/s]


Adaptive submission file created: final_submission.csv

Analyzing prediction differences:
Agreement rate between prediction files: 97.16%

Disagreements by class:
airplane: 171 disagreements
automobile: 123 disagreements
bird: 177 disagreements
cat: 252 disagreements
deer: 97 disagreements
dog: 159 disagreements
frog: 94 disagreements
horse: 98 disagreements
ship: 110 disagreements
truck: 139 disagreements

Recommended submission file: final_submission.csv
Alternative submission file: ensemble_submission.csv
