In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/data_batch_1
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/data_batch_2
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/batches.meta
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/test_batch
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/data_batch_3
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/data_batch_5
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/data_batch_4
/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/readme.html
/kaggle/input/nearlyfinal/pytorch/default/1/best_model (9).pth
/kaggle/input/finetuning_latest_90/pytorch/default/1/best_model (11).pth


In [None]:
import os
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
!pip install torchsummary
import torchsummary
import random

import torch.nn.init as init

import torchvision
from torchvision import transforms

from torch.nn.utils import (
  parameters_to_vector as Params2Vec,
  vector_to_parameters as Vec2Params
)

import torchvision.transforms.v2 as v2
import random
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR
from torch.optim.lr_scheduler import LambdaLR



In [3]:
# Dataset path
data_dir = "/kaggle/input/deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/"

In [4]:
# Function to unpickle a batch
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [5]:
# Class labels from batches.meta
meta_path = os.path.join(data_dir, "batches.meta")
meta_data = unpickle(meta_path)
class_labels = [label.decode('utf-8') for label in meta_data[b'label_names']]

In [6]:
# Load and merge Training & Test Data
X_train_test, y_train_test = [], []
ids_train_test = []

In [7]:
# Load Training Data (All 5 Batches) and Test Data in a single loop
for i in range(1, 7):  # 1 to 6 (including test batch as 6th iteration)
    batch_name = f"data_batch_{i}" if i <= 5 else "test_batch"
    batch_path = os.path.join(data_dir, batch_name)
    batch_data = unpickle(batch_path)
    
    images = batch_data[b'data']
    labels = batch_data[b'labels']

    filenames = batch_data[b'filenames']

    # Decode filenames from bytes to strings
    filenames = [name.decode('utf-8') for name in filenames]
    
    # Reshape images to (num_samples, 32, 32, 3)
    images = images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    
    X_train_test.append(images)
    y_train_test.extend(labels)
    ids_train_test.extend(filenames)  # Store filenames as IDs

# Convert lists to NumPy arrays
X_train_test = np.concatenate(X_train_test, axis=0)
y_train_test = np.array(y_train_test)
ids_train_test = np.array(ids_train_test)  # Convert IDs to NumPy array

# Compute merged dataset class distribution
unique_classes, counts_classes = np.unique(y_train_test, return_counts=True)

print("Merged dataset shape:", X_train_test.shape, y_train_test.shape)
print("Class distribution:", dict(zip(unique_classes, counts_classes)))

Merged dataset shape: (60000, 32, 32, 3) (60000,)
Class distribution: {0: 6000, 1: 6000, 2: 6000, 3: 6000, 4: 6000, 5: 6000, 6: 6000, 7: 6000, 8: 6000, 9: 6000}


In [8]:
# PyTorch Dataset Class with Transformations
class CIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform_structured):
        self.images = images
        self.labels = labels
        self.transform_structured = transform_structured
        
        # self.transform_natural = transform_natural

        # CIFAR-10 Structured & Natural Classes
        #self.structured_classes = {0, 1, 8, 9}  # Airplane, Car, Ship, Truck
        #self.natural_classes = {2, 3, 4, 5, 6, 7}  # Bird, Cat, Deer, Dog, Frog, Horse

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # Apply appropriate transformation
        #if label in self.structured_classes:
        #     image = self.transform_structured(image)
        #else:
        #     image = self.transform_natural(image)
        image = self.transform_structured(image)

        return image, label

In [9]:
class Cutout(torch.nn.Module):
    def __init__(self, mask_size, p=0.5):
        """
        Args:
            mask_size (int): The size of the square cutout mask.
            p (float): Probability of applying cutout.
        """
        super().__init__()
        self.mask_size = mask_size
        self.p = p

    def forward(self, img):
        # img is a torch.Tensor with shape (C, H, W)
        if random.random() > self.p:
            return img  # No cutout applied
        
        c, h, w = img.shape
        # Choose random center coordinates
        y = random.randint(0, h - 1)
        x = random.randint(0, w - 1)
        
        y1 = max(0, y - self.mask_size // 2)
        y2 = min(h, y + self.mask_size // 2)
        x1 = max(0, x - self.mask_size // 2)
        x2 = min(w, x + self.mask_size // 2)
        
        # Zero out the selected region
        img[:, y1:y2, x1:x2] = 0.0
        return img

In [10]:
transform_finetune = transforms.Compose([
    transforms.ToTensor(),

    # Positional Augmentations (Helps Generalization)
    transforms.RandomCrop(32, padding=4),  
    transforms.RandomHorizontalFlip(p=0.5),  
    transforms.RandomRotation(5),  

    # Mild Color Augmentation (Avoid Over-Augmenting)
    transforms.ColorJitter(brightness=0.05, contrast=0.05),  

    # Reduce Background Dependence
    transforms.RandomErasing(p=0.2),  
    Cutout(mask_size=12, p=0.3),  

    # Normalization 
    transforms.Normalize(mean=[0.49139968, 0.48215827, 0.44653124],  
                         std=[0.24703233, 0.24348505, 0.26158768])  
])

In [12]:
batch_size = 64 # Same as training
num_workers = 4  # Kaggle: use 2 if needed

# Create dataset with different transformations for different classes
train_dataset = CIFAR10Dataset(X_train_test, y_train_test, transform_finetune)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

In [13]:
# Custom ResNet Model

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock, self).__init__()
        
        self.fc1 = nn.Linear(channels, channels // reduction, bias=False)
        self.fc2 = nn.Linear(channels // reduction, channels, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = F.adaptive_avg_pool2d(x, 1).view(x.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out).view(x.size(0), x.size(1), 1, 1)
        return x * out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64 

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        self.seblock = SEBlock(channels=self.in_planes)
        
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)  
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.avg_pool = nn.AvgPool2d(8)  
        self.fc = nn.Linear(256 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.seblock(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [None]:
epochs = 30 # Finetune for 30 epoch

def LiteResNet():
    return ResNet(BasicBlock, [7, 4, 3])  

# Define Training and Evaluation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LiteResNet().to(device)

model_path = "/kaggle/input/finetuning_latest_90/pytorch/default/1/best_model.pth"

model.load_state_dict(torch.load(model_path, map_location=device))  # Load weights

loss = nn.CrossEntropyLoss()

# Fine-Tuning Phase Optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=0.01)
scheduler = CosineAnnealingLR(optimizer, T_max=epochs, eta_min=1e-6)

  model.load_state_dict(torch.load(model_path, map_location=device))  # Load weights


In [15]:
# Freeze first few layers so that the model keeps general features & previous training knowledge
for param in model.conv1.parameters():
    param.requires_grad = False  # Freeze only the first layer

for param in model.layer1.parameters():
    param.requires_grad = False  # Freeze the first block

In [16]:
torchsummary.summary(model,(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Linear-3                    [-1, 4]             256
              ReLU-4                    [-1, 4]               0
            Linear-5                   [-1, 64]             256
           Sigmoid-6                   [-1, 64]               0
           SEBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
       BasicBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

In [17]:
#mixup = v2.MixUp(alpha=1.0, num_classes=10)  # 🔥 Use PyTorch’s built-in MixUp
cutmix = v2.CutMix(alpha=1.0, num_classes=10)
mixup = v2.MixUp(alpha=0.4, num_classes=10)

In [18]:
# Apply cutmix and mixup to the batch
EXCLUDE_CLASSES = [3, 5]  # 3: Cat, 5: Dog

def apply_augmentation(images, labels):
    batch_size = images.shape[0]
    rand_vals = torch.rand(batch_size, device=labels.device)  # Ensure random values are on the same device

    cutmix_mask = rand_vals < 0.35  # Apply 35% CutMix
    mixup_mask = (rand_vals >= 0.35) & (rand_vals < 0.7)  # Apply 35% MixUp

    # Move exclude_mask to the correct device
    exclude_mask = torch.tensor([label.item() in EXCLUDE_CLASSES for label in labels], 
                                dtype=torch.bool, device=labels.device)

    # Ensure Cats & Dogs are NOT used in MixUp or CutMix
    cutmix_mask &= ~exclude_mask  # Remove Cats & Dogs from CutMix
    mixup_mask &= ~exclude_mask   # Remove Cats & Dogs from MixUp

    # Apply augmentations
    if cutmix_mask.any():
        aug_images, aug_labels = cutmix(images[cutmix_mask], labels[cutmix_mask])
        images[cutmix_mask] = aug_images
        labels[cutmix_mask] = aug_labels.argmax(dim=1)  # Convert back to class indices

    if mixup_mask.any():
        aug_images, aug_labels = mixup(images[mixup_mask], labels[mixup_mask])
        images[mixup_mask] = aug_images
        labels[mixup_mask] = aug_labels.argmax(dim=1)  # Convert back to class indices

    return images, labels  # 30% remain unchanged, and excluded classes are untouched

In [19]:
# Store training history
train_loss_history = []
train_accuracy_history = []
epoch_list = []

# Initialize best loss and accuracy tracking
best_loss = float("inf")
best_accuracy = 0.0
scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


In [20]:
# Training Loop
for epoch in range(epochs):
    train_loss = 0.0
    total = 0
    correct = 0
    train_correct = 0
    train_total = 0
    
    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Apply MixUp or CutMix randomly with defined probabilities
        images, labels = apply_augmentation(images, labels)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            predicted_output = model(images)
            fit = loss(predicted_output, labels)  # Compute loss
        
        scaler.scale(fit).backward()
        scaler.step(optimizer)
        scaler.update()
        train_loss += fit.item()
        
        # Compute training accuracy
        if labels.dim() == 2:  # If labels are one-hot encoded (MixUp or CutMix)
            labels = labels.argmax(dim=1)  # Convert to class indices

        # Compute training accuracy
        _, predicted = predicted_output.max(1)
        train_total += labels.size(0)
        train_correct += predicted.eq(labels).sum().item()
    
    # Compute accuracy
    train_accuracy = 100. * train_correct / train_total
    train_loss /= len(train_loader)
    
    scheduler.step()
    
    # Save best model based on loss & accuracy
    if best_accuracy < train_accuracy:
        best_accuracy = train_accuracy
        torch.save(model.state_dict(), "best_model_finetuned.pth")
        print(f"Saved best model at epoch {epoch} with loss {train_loss:.4f} and accuracy {best_accuracy:.2f}%")
        
    # Store loss and accuracy history
    train_loss_history.append(train_loss)
    train_accuracy_history.append(train_accuracy)
    epoch_list.append(epoch)
    
    print(f"Epoch {epoch}, Train loss {train_loss:.4f}, Train Accuracy {train_accuracy:.2f}%")

  with torch.cuda.amp.autocast():


Saved best model at epoch 0 with loss 0.3028 and accuracy 90.97%
Epoch 0, Train loss 0.3028, Train Accuracy 90.97%
Epoch 1, Train loss 0.3099, Train Accuracy 90.71%
Epoch 2, Train loss 0.3064, Train Accuracy 90.70%
Epoch 3, Train loss 0.3020, Train Accuracy 90.83%
Epoch 4, Train loss 0.3077, Train Accuracy 90.59%
Epoch 5, Train loss 0.3059, Train Accuracy 90.49%
Saved best model at epoch 6 with loss 0.2935 and accuracy 90.97%
Epoch 6, Train loss 0.2935, Train Accuracy 90.97%
Epoch 7, Train loss 0.2996, Train Accuracy 90.81%
Epoch 8, Train loss 0.2895, Train Accuracy 90.97%
Epoch 9, Train loss 0.2960, Train Accuracy 90.85%
Epoch 10, Train loss 0.2954, Train Accuracy 90.85%
Epoch 11, Train loss 0.2981, Train Accuracy 90.81%
Epoch 12, Train loss 0.2926, Train Accuracy 90.91%
Saved best model at epoch 13 with loss 0.2780 and accuracy 91.35%
Epoch 13, Train loss 0.2780, Train Accuracy 91.35%
Epoch 14, Train loss 0.2944, Train Accuracy 90.86%
Epoch 15, Train loss 0.2951, Train Accuracy 90.85

In [21]:
# Save training history to Excel
history_df = pd.DataFrame({
    'Epoch': epoch,
    'Train Loss': train_loss_history,
    'Train Accuracy': train_accuracy_history
})

In [22]:
history_df.to_excel("training_history_finetuning.xlsx", index=False)
print("Training history while finetuning saved to training_history_finetuning.xlsx")

Training history while finetuning saved to training_history_finetuning.xlsx


In [23]:
# Store results
results = []

# Disable gradient computation for inference
with torch.no_grad():
    for batch in train_loader:
        inputs, labels = batch  # Unpacking correctly
        inputs = inputs.to(device)

        # Model predictions
        outputs = model(inputs)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1)
        confidence_score = torch.max(probabilities, dim=1).values

        # Append results
        for i in range(len(labels)):
            results.append({
                "score": predicted_class[i].item(),
                "confidence_score": confidence_score[i].item(),
                "actual_label": labels[i].item(),
                "correct": predicted_class[i].item() == labels[i].item()
            })

# Convert results to DataFrame
df = pd.DataFrame(results)

In [24]:
df

Unnamed: 0,score,confidence_score,actual_label,correct
0,7,0.997787,7,True
1,3,0.998816,3,True
2,1,0.955720,1,True
3,2,0.880871,2,True
4,8,0.991098,8,True
...,...,...,...,...
59995,7,0.978534,7,True
59996,8,0.997890,8,True
59997,4,0.996656,4,True
59998,5,0.987169,5,True


In [25]:
# Count the number of rows where score is exactly 7
count = (df['score'] == 6).sum()
print(count)

6068


In [26]:
# Filter rows where confidence score is less than 50%
df_low_confidence = df[df["confidence_score"] < 0.50]
df_low_confidence

Unnamed: 0,score,confidence_score,actual_label,correct
82,2,0.476076,4,False
163,0,0.495429,0,True
250,4,0.465143,7,False
404,2,0.288377,3,False
561,5,0.446734,3,False
...,...,...,...,...
59343,9,0.494389,0,False
59456,2,0.365543,4,False
59459,6,0.394464,2,False
59625,8,0.482890,8,True


In [27]:
# Table for counting wrong predictions

prediction_counts = df.groupby(["actual_label", "score"]).size().unstack(fill_value=0)

# Rename prediction columns
prediction_counts.columns = [f"pred_{int(col)}" for col in prediction_counts.columns]

# Group by actual label and count correct/incorrect predictions
summary_df = df.groupby("actual_label").agg(
    correct_predictions=("correct", "sum"),   # Sum of True values (Correct predictions)
    false_predictions=("correct", lambda x: (~x).sum())  # Sum of False values (Incorrect predictions)
).reset_index()

# Merge prediction counts into summary_df
summary_df = summary_df.merge(prediction_counts, left_on="actual_label", right_index=True, how="left")

# Rename columns for clarity
summary_df.rename(columns={"actual_label": "Label"}, inplace=True)

# Fill NaN values with 0 (in case a label was never predicted as a certain class)
summary_df.fillna(0, inplace=True)

# Map numerical labels to class names in the format "0(aeroplane)", "1(automobile)", etc.
label_map = {i: f"{i}({class_labels[i]})" for i in range(len(class_labels))}

# Apply mapping to the 'Label' column
summary_df["Label"] = summary_df["Label"].map(label_map)

summary_df

Unnamed: 0,Label,correct_predictions,false_predictions,pred_0,pred_1,pred_2,pred_3,pred_4,pred_5,pred_6,pred_7,pred_8,pred_9
0,0(airplane),5914,86,5914,9,14,4,3,2,1,6,35,12
1,1(automobile),5940,60,4,5940,3,3,0,0,1,1,8,40
2,2(bird),5844,156,37,0,5844,18,41,15,29,7,9,0
3,3(cat),5717,283,6,1,49,5717,35,106,49,23,11,3
4,4(deer),5883,117,5,0,26,14,5883,13,30,25,3,1
5,5(dog),5757,243,8,1,32,122,42,5757,15,21,1,1
6,6(frog),5933,67,6,2,19,14,17,5,5933,2,2,0
7,7(horse),5929,71,7,0,15,8,19,14,2,5929,5,1
8,8(ship),5937,63,25,9,5,5,4,1,3,1,5937,10
9,9(truck),5919,81,10,43,3,1,1,0,5,2,16,5919
