<a href="https://colab.research.google.com/github/suchith83/AI/blob/main/final_sub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

col_774_a_3_new_path = kagglehub.competition_download('col-774-a-3-new')

print('Data source import complete.')


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.batchnorm import _BatchNorm
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, Dataset, random_split
from PIL import Image
import numpy as np
import pickle
import time
from sklearn.linear_model import LogisticRegression

from torch import nn, optim
# from torch.nn import functional as F
import pandas as pd

In [None]:
# loading data functions

class CIFAR100Dataset(Dataset):
    def __init__(self, file_path, transform=None):
        with open(file_path, 'rb') as f:
            self.data = pickle.load(f)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

class CustomDatasetWrapper(torch.utils.data.Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform

    def __len__(self):
        return len(self.subset)

    def __getitem__(self, idx):
        image, label = self.subset[idx]
        if self.transform:
            image = self.transform(image)
        return image, label


In [None]:
#SAM

class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, adaptive=adaptive, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups
        self.defaults.update(self.base_optimizer.defaults)

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                self.state[p]["old_p"] = p.data.clone()
                e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.data = self.state[p]["old_p"]  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

    def load_state_dict(self, state_dict):
        super().load_state_dict(state_dict)
        self.base_optimizer.param_groups = self.param_groups


In [None]:
#WideResNet

class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(int(nb_layers)):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)

class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) / 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

# print('completed')

In [None]:
#bypassing batchnorm (disabling BN)

def disable_running_stats(model):
    def _disable(module):
        if isinstance(module, _BatchNorm):
            module.backup_momentum = module.momentum
            module.momentum = 0

    model.apply(_disable)

def enable_running_stats(model):
    def _enable(module):
        if isinstance(module, _BatchNorm) and hasattr(module, "backup_momentum"):
            module.momentum = module.backup_momentum

    model.apply(_enable)


In [None]:

class TemperatureScaling:
    def __init__(self, model):
        self.model = model
        self.temperature = torch.nn.Parameter(torch.ones(1) * 1.0)

    def set_temperature(self, validation_loader):
        self.model.eval()
        logits_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in validation_loader:
                logits = self.model(inputs)
                logits_list.append(logits)
                labels_list.append(labels)

        logits = torch.cat(logits_list)
        labels = torch.cat(labels_list)

        # Optimize temperature
        def loss_fn():
            return F.cross_entropy(logits / self.temperature, labels)

        optimizer = torch.optim.LBFGS([self.temperature], lr=0.01, max_iter=50)
        optimizer.step(lambda: optimizer.zero_grad() or loss_fn().backward())

        return self.temperature.item()

    def forward(self, inputs):
        logits = self.model(inputs)
        return logits / self.temperature

In [None]:
class ModelwithT(nn.Module):
    def __init__(self, model):
        super(ModelwithT, self).__init__()
        self.model = model
        self.temp = nn.Parameter(torch.ones(1) * 1.5)

    def forward(self, input):
        logits = self.model(input)
        return logits / self.temp

    def set_temp(self, valid_loader):
        self.cuda()
        nll_criterion = nn.CrossEntropyLoss().cuda()
        ece_criterion = _ECELoss().cuda()

        logits_list = []
        labels_list = []

        with torch.no_grad():
            for input, label in valid_loader:
                input = input.cuda()
                logits = self.model(input)
                logits_list.append(logits)
                labels_list.append(label)
            logits = torch.cat(logits_list).cuda()
            labels = torch.cat(labels_list).cuda()

            before_temp_nll = nll_criterion(logits, labels).item()
            before_temp_ece = ece_criterion(logits, labels).item()

            print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temp_nll, before_temp_ece))

            optimizer = optim.LBFGS([self.temp], lr = 0.01, max_iter=50)

            def eval():
                optimizer.zero_grad()
                loss = nll_criterion(self.temp_scale(logits), labels)
                loss.backward()
                return loss
            optimizer.step(eval)

            after_temp_nll = nll_criterion(self.temp_scale(logits), labels).item()
            after_temp_ece = ece_criterion(self.temp_scale(logits), labels).item()

            print("T: ", self.temp)
            print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temp_nll, after_temp_ece))

            return self

class _ECELoss(nn.Module):
    def __init__(self, n_bins=15):
        super(_ECELoss, self).__init__()
        bin_boundaries = torch.linspace(0,1,n_bins + 1)
        self.bin_lowers = bin_boundaries[:-1]
        self.bin_uppers = bin_boundaries[1:]

    def forward(self, logits, labels):
        softmaxes = F.softmax(logits, dim=1)
        confidences, predictions = torch.max(softmaxes, 1)
        accuracies = predictions.eq(labels)

        ece = torch.zeros(1, device=logits.device)
        for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
            in_bin = confidences.gt(bin_lower.item())*confidences.le(bin_upper.item())
            prop_in_bin = in_bin.float().mean()
            if prop_in_bin.item() > 0:
                accuracy_in_bin = accuracies[in_bin].float().mean()
                avg_confidence_in_bin = confidences[in_bin].mean()
                ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
        return ece

# print('completed')

In [None]:
#Cutout data augumentation
# import torch


class Cutout:
    def __init__(self, size=16, p=0.5):
        self.size = size
        self.half_size = size // 2
        self.p = p

    def __call__(self, image):
        if torch.rand([1]).item() > self.p:
            return image

        left = torch.randint(-self.half_size, image.size(1) - self.half_size, [1]).item()
        top = torch.randint(-self.half_size, image.size(2) - self.half_size, [1]).item()
        right = min(image.size(1), left + self.size)
        bottom = min(image.size(2), top + self.size)

        image[:, max(0, left): right, max(0, top): bottom] = 0
        return image


#learning rate scheduler
class StepLR:
    def __init__(self, optimizer, learning_rate: float, total_epochs: int):
        self.optimizer = optimizer
        self.total_epochs = total_epochs
        self.base = learning_rate

    def __call__(self, epoch):
        if epoch < self.total_epochs * 3/10:
            lr = self.base
        elif epoch < self.total_epochs * 6/10:
            lr = self.base * 0.2
        elif epoch < self.total_epochs * 8/10:
            lr = self.base * 0.2 ** 2
        else:
            lr = self.base * 0.2 ** 3

        for param_group in self.optimizer.param_groups:
            param_group["lr"] = lr

    def lr(self) -> float:
        return self.optimizer.param_groups[0]["lr"]


# print('completedd')

In [None]:
def calculate_classwise_accuracy(df, pred_col):

    accuracy_dict = {}
    grouped = df.groupby(pred_col)

    for name, group in grouped:
        accuracy = (group['Label'] == group[pred_col]).sum() / len(group)
        accuracy_dict[name] = accuracy

    return accuracy_dict

def score(solution: pd.DataFrame, submission: pd.DataFrame, id_column_name: str, accuracy_threshold: float = 0.7, gamma: float = 5.0) -> float:
    """
    Custom metric to evaluate model performance.

    Returns the overall performance score based on correct classifications for high and low accuracy classes.

    Parameters:
        - solution: DataFrame containing the true class labels.
        - submission: DataFrame containing the predicted class labels.
        - id_column_name: Name of the column used for merging both DataFrames.
        - accuracy_threshold: Threshold for class accuracy.
        - gamma: Weighting factor for low accuracy classifications.

    Returns:
        - A single float representing the overall performance of the model.
    """

    # Merge solution and submission DataFrames using the ID column.
    filtered_df = pd.merge(solution, submission, on=id_column_name)

    # Assuming 'Label' comes from solution and 'Predicted_label' comes from submission
    filtered_df = filtered_df[['Label', 'Predicted_label']]

    # Exclude rows with -1 predictions
    filtered_df = filtered_df[filtered_df['Predicted_label'] != -1]

    all_classes = list(range(100))  # Assuming classes are from 0 to 99
    sum_of_correctly_classified_high_accuracy = 0
    sum_of_correctly_classified_low_accuracy = 0

    # Calculate accuracy per class
    accuracy_per_class = calculate_classwise_accuracy(filtered_df, 'Predicted_label')

    for cls in all_classes:
        total = len(filtered_df[filtered_df['Predicted_label'] == cls])
        correct = (filtered_df[filtered_df['Predicted_label'] == cls]['Predicted_label'] == filtered_df[filtered_df['Predicted_label'] == cls]['Label']).sum()
        class_accuracy = accuracy_per_class.get(cls, 0.0)

        if class_accuracy >= accuracy_threshold:
            sum_of_correctly_classified_high_accuracy += total
        else:
            sum_of_correctly_classified_low_accuracy += total

    # Calculate final score
    final_score = sum_of_correctly_classified_high_accuracy - gamma * sum_of_correctly_classified_low_accuracy

    return float(final_score)

# print('completed')

In [None]:
#validate model
def validate_model(model, val_loader, loss_function, device,confidences= 0.9):
    model.eval()
    # val_loss = 0
    correct = 0
    total = 0

    predictions = []
    true_labels = []

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            probs =  torch.softmax(outputs, dim=1)
            probility, pred_class = probs.max(dim=1)
            # loss = loss_function(outputs, targets)
            # val_loss += loss.item()
            combined = zip(probility, pred_class)
            for prob, pred in combined:
                if prob > confidences:
                    predictions.append(pred.item())
                else:
                    predictions.append(-1)
            #add true labels
            # Adding true labels for tracking
            true_labels.extend(targets.cpu().numpy())

            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        # Calculate the final score
        solution = pd.DataFrame({'ID': range(len(true_labels)), 'Label': true_labels})
        submission = pd.DataFrame({'ID': range(len(predictions)), 'Predicted_label': predictions})

        score_value = score(solution, submission, id_column_name='ID')

    # return

    return 100. * correct / total,score_value


# print('completedd')

In [None]:
def scaled_model(model, valid_loader,device):
    model.eval()
    scaled_model = ModelwithT(model)
    scaled_model = scaled_model.to(device)
    scaled_model.set_temp(valid_loader)

    return scaled_model
# print('completed')

In [None]:
#train model

def train_model(model, train_loader,validation_loader, optimizer, loss_function,scheduler, epochs,device):

    best_model = None
    best_score = float('-inf')

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        # correct = 0
        # total = 0
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)

            #first forward-backward step
            enable_running_stats(model)
            predictions = model(inputs)
            loss = loss_function(predictions, targets)
            loss.backward()

            optimizer.first_step(zero_grad=True)

            # second forward-backward step
            disable_running_stats(model)
            loss_function(model(inputs), targets).backward()
            optimizer.second_step(zero_grad=True)

            epoch_loss += loss.item()

        scheduler(epoch)

        temp_scaled_model = scaled_model(model, train_loader,device)


        #validation

        val_accuracy, val_score = validate_model(temp_scaled_model, validation_loader, loss_function, device)

        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}, Val Acc: {val_accuracy:.2f}%, Val Score: {val_score}')

        if val_score > best_score:
            best_score = val_score
            best_model = model.state_dict()


    return best_model, best_score

# print('completed')

In [None]:


#create model

depth = 28
widen_factor = 10
num_classes = 100
drop_rate = 0.3
# labelsmoothing = 0.1
epochs = 60
weightdecay = 5e-4
momentum_factor = 0.9
batchsize = 128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = WideResNet(depth, num_classes, widen_factor, drop_rate)
model = model.to(device)

# loss_function = nn.CrossEntropyLoss(label_smoothing=labelsmoothing)
loss_function = nn.CrossEntropyLoss()

base_optimizer = torch.optim.SGD
isnesterov = True
rho = 0.05

optimizer = SAM(model.parameters(), base_optimizer= base_optimizer, lr = 0.1, weight_decay = weightdecay, momentum = momentum_factor,dampening=0,nesterov=isnesterov)

# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer.base_optimizer, T_max=epochs)

scheduler = StepLR(optimizer, learning_rate=0.1, total_epochs=epochs)




normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]],
                                     std=[x/255.0 for x in [63.0, 62.1, 66.7]])


transform_train = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        Cutout(size=16,p = 0.5),
        normalize
    ])


transform_test = transforms.Compose([
    normalize
])


train_data = CIFAR100Dataset('/kaggle/input/col-774-a-3/train.pkl', transform=None)
# train_loader = DataLoader(train_data, batch_size=batchsize, shuffle=True, num_workers=2, pin_memory=True)
# test_data = CIFAR100Dataset('/kaggle/input/col-774-a-3/test.pkl', transform=transform_test)



# Set the size of the training and validation sets
train_size = 30000
val_size = 10000

# Split the dataset once into training and validation subsets
train_subset, val_subset = random_split(train_data, [train_size, val_size])

train_subset = CustomDatasetWrapper(train_subset, transform=transform_train)
val_subset = CustomDatasetWrapper(val_subset, transform=transform_test)


# DataLoader for each subset
train_loader = DataLoader(train_subset, batch_size=batchsize, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_subset, batch_size=batchsize, shuffle=False, num_workers=2, pin_memory=True)


# Measure the start time
start_time = time.time()
print('Training')
# Train the model
best_model, best_score = train_model(model, train_loader, val_loader, optimizer, loss_function, scheduler, epochs, device)


model.load_state_dict(best_model)

# save model
torch.save(model.state_dict(), 'model.pth')
# Measure the end time
end_time = time.time()

# Print the total time taken
total_time = end_time - start_time

print(f"Training completed in {total_time:.2f} seconds")
print('completed')

In [None]:
normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]],
                                     std=[x/255.0 for x in [63.0, 62.1, 66.7]])


transform_train = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        Cutout(size=16,p = 0.5),
        normalize
    ])


transform_test = transforms.Compose([
    normalize
])

batchsize = 128

train_data = CIFAR100Dataset('/kaggle/input/col-774-a-3/train.pkl', transform=transform_train)
test_data = CIFAR100Dataset('/kaggle/input/col-774-a-3/test.pkl', transform=transform_test)

# Data loader for train, test
train_loader = DataLoader(train_data, batch_size=batchsize, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=2, pin_memory=True)

In [None]:
#create model

depth = 28
widen_factor = 10
num_classes = 100
drop_rate = 0.3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

fin_model = WideResNet(depth, num_classes, widen_factor, drop_rate)
fin_model = fin_model.to(device)

# fin_model.load_state_dict(torch.load('/kaggle/input/v2/pytorch/default/1/model_hari.pth'))
fin_model.load_state_dict(torch.load('/kaggle/input/v1/pytorch/default/1/model.pth'))

In [None]:
fin_model.eval()
scal_model = scaled_model(fin_model, train_loader, device)
scal_model.eval()
torch.save(scal_model, 'scaled_model2.pth')

valid_loader = DataLoader(train_data, batch_size=1, shuffle=True, num_workers=2, pin_memory=True)

In [None]:
predictions = []
count = 0
with torch.no_grad():
    for X_val, Y_val in valid_loader:
#         if count % 100 == 0:
#             print(len(predictions), count)
        X_val, Y_val = X_val.to(device), Y_val.to(device)
        outputs = scal_model(X_val)

        predicted = torch.softmax(outputs,dim=1)

        predicted_class = predicted.argmax(dim=1).item()
        predicted_prob = predicted[0,predicted_class].item()

        predictions.append((predicted_prob,predicted_class,Y_val.item()))

#         print((predicted_prob,predicted_class,Y_val.item()))

        if predicted_class == Y_val.item():
            count += 1

print("accuracy: ", count/40000)

In [None]:
h = {(1)*i:[0,0] for i in range(1,11)}
for prob,pc,oc in predictions:
    l,r = 0,1
    while r <= 10 :
        if prob >= l/10 and prob < r/10:
            h[r][1] += 1
            if pc == oc:
                h[r][0] += 1
            break
        else:
            l = r
            r += 1
print(h)

In [None]:
tau = 0.9925
test_predictions = []
with torch.no_grad():
    for X,id in test_loader:
         #dimesnsions = batchsize x channels x height x width = batchsize x 1 x 50 x 100
        X = X.to(device)
#         print(X.shape)
        outputs = scal_model(X)    #dimesnsions = batchsize x k = batchsize x 8
        # print('outputs' , outputs)


        predicted =  torch.softmax(outputs, dim=1)  # Apply softmax to get probabilities
        # print("predicted: ", predicted)


        predicted_class = predicted.argmax(dim=1).item()
        # print("class: ", predicted[0, predicted_class].item())
        if predicted[0, predicted_class].item() > tau:
            # Keep the predicted class
            final_predicted_class = predicted_class
        else:
            # If the probability is less than the threshold, assign -1
            final_predicted_class = -1

        test_predictions.append([id.item(),final_predicted_class])
#chnage the predictions to numpy array
test_predictions = np.array(test_predictions)

In [None]:
tmp = test_predictions[:, 1]
unique_values, counts = np.unique(tmp, return_counts=True)
unique_values, counts

In [None]:
import csv
output_file = 'submission3_3.csv'

# Open the file in write mode
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header
    writer.writerow(['ID', 'Predicted_label'])

    # Write the data
    writer.writerows(test_predictions)

file.close()

In [None]:
class PlattScaling:
    def __init__(self, model):
        self.model = model.cuda()  # Move the model to GPU if available
        self.platt_models = []

    def fit(self, validation_loader):
        logits_list, labels_list = [], []
        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs = inputs.cuda()  # Move inputs to GPU
                logits = self.model(inputs)
                logits_list.extend(logits.cpu().numpy())  # Move logits back to CPU for sklearn
                labels_list.extend(labels.cpu().numpy())  # Move labels back to CPU

        logits = np.array(logits_list)
        labels = np.array(labels_list)

        # Fit one logistic regression per class (One-vs-Rest approach for multiclass)
        for class_idx in range(logits.shape[1]):
            clf = LogisticRegression()
            clf.fit(logits[:, [class_idx]], labels == class_idx)
            self.platt_models.append(clf)

    def predict_proba(self, inputs):
        inputs = inputs.cuda()  # Move inputs to GPU
        logits = self.model(inputs).cpu().detach().numpy()  # Get logits, move to CPU
        calibrated_probs = np.zeros_like(logits)

        for class_idx, clf in enumerate(self.platt_models):
            calibrated_probs[:, class_idx] = clf.predict_proba(logits[:, [class_idx]])[:, 1]

        return calibrated_probs
