In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from collections import defaultdict
from sklearn.metrics import classification_report
import json
import matplotlib.pyplot as plt
import numpy as np

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from collections import defaultdict
from sklearn.metrics import classification_report
import json
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader

class MultiTaskResNet50(nn.Module):
    def __init__(self):
        super(MultiTaskResNet50, self).__init__()
        self.resnet = models.resnet34(pretrained=True)
        for param in self.resnet.parameters():
            param.requires_grad = True

        num_ftrs = self.resnet.fc.in_features
        
        self.puzzle_decoder = nn.Sequential(
            nn.Linear(1000, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

        self.mag_decoder = nn.Sequential(
            nn.Linear(1000, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 4)
        )


    def forward(self, x_i, x_j):

        z_i = self.resnet(x_i)
        z_j = self.resnet(x_j)
    


        y_i = self.mag_decoder(z_i)
        y_j = self.puzzle_decoder(z_j)


        return y_j, y_i 

# Example: If task1 has 10 classes and task2 has 4 classes
model = MultiTaskResNet50()


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 124MB/s] 


In [3]:
from torchvision import datasets

mag_train_dir = '/kaggle/input/train-mag/train'
mag_val_dir = '/kaggle/input/validation-mag/validation'

puzzle_train_dir = '/kaggle/input/puzzle-6000/puzzle_6000'
puzzle_val_dir = '/kaggle/input/validation-puzzle/validation'

mag_dirs = {'train': mag_train_dir,
        'validation' : mag_val_dir
       }

puzzle_dirs = {'train': puzzle_train_dir,
        'validation' : puzzle_val_dir
       }


from torchvision import transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(45),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                            [0.229, 0.224, 0.225])
    ]),

    'validation':transforms.Compose([
        transforms.RandomRotation(45),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], 
                            [0.229, 0.224, 0.225])
    ])
}

mag_image_datasets = {x: datasets.ImageFolder( mag_dirs[x],   transform=data_transforms[x]) for x in ['train', 'validation']}
puzzle_image_datasets = {x: datasets.ImageFolder( puzzle_dirs[x],   transform=data_transforms[x]) for x in ['train', 'validation']}

mag_dataloaders = {x: torch.utils.data.DataLoader(mag_image_datasets[x], batch_size=64, shuffle=True, drop_last=True) for x in ['train', 'validation']}
puzzle_dataloaders = {x: torch.utils.data.DataLoader(puzzle_image_datasets[x], batch_size=64, shuffle=True, drop_last=True) for x in ['train', 'validation']}

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
model = model.to(device)

In [6]:
criterion_task1 = nn.CrossEntropyLoss()
criterion_task2 = nn.CrossEntropyLoss()



In [7]:
import torch
import torch.distributed as dist


class GatherLayer(torch.autograd.Function):
    """Gather tensors from all process, supporting backward propagation."""

    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        output = [torch.zeros_like(input) for _ in range(dist.get_world_size())]
        dist.all_gather(output, input)
        return tuple(output)

    @staticmethod
    def backward(ctx, *grads):
        (input,) = ctx.saved_tensors
        grad_out = torch.zeros_like(input)
        grad_out[:] = grads[dist.get_rank()]
        return grad_out

In [8]:
import torch
import torch.nn as nn
import torch.distributed as dist


class NT_Xent(nn.Module):
    def __init__(self, batch_size, temperature, world_size):
        super(NT_Xent, self).__init__()
        self.batch_size = batch_size
        self.temperature = temperature
        self.world_size = world_size

        self.mask = self.mask_correlated_samples(batch_size, world_size)
        self.criterion = nn.CrossEntropyLoss(reduction="sum")
        self.similarity_f = nn.CosineSimilarity(dim=2)

    def mask_correlated_samples(self, batch_size, world_size):
        N = 2 * batch_size * world_size
        mask = torch.ones((N, N), dtype=bool)
        mask = mask.fill_diagonal_(0)
        for i in range(batch_size * world_size):
            mask[i, batch_size * world_size + i] = 0
            mask[batch_size * world_size + i, i] = 0
        return mask

    def forward(self, z_i, z_j):
        """
        We do not sample negative examples explicitly.
        Instead, given a positive pair, similar to (Chen et al., 2017), we treat the other 2(N − 1) augmented examples within a minibatch as negative examples.
        """
        N = 2 * self.batch_size * self.world_size

        if self.world_size > 1:
            z_i = torch.cat(GatherLayer.apply(z_i), dim=0)
            z_j = torch.cat(GatherLayer.apply(z_j), dim=0)
        z = torch.cat((z_i, z_j), dim=0)

        sim = self.similarity_f(z.unsqueeze(1), z.unsqueeze(0)) / self.temperature

        sim_i_j = torch.diag(sim, self.batch_size * self.world_size)
        sim_j_i = torch.diag(sim, -self.batch_size * self.world_size)

        # We have 2N samples, but with Distributed training every GPU gets N examples too, resulting in: 2xNxN
        positive_samples = torch.cat((sim_i_j, sim_j_i), dim=0).reshape(N, 1)
        negative_samples = sim[self.mask].reshape(N, -1)

        labels = torch.zeros(N).to(positive_samples.device).long()
        logits = torch.cat((positive_samples, negative_samples), dim=1)
        loss = self.criterion(logits, labels)
        loss /= N
        return loss

In [9]:
temperature = .5
batch_size = 64

#criterion_task1 = NT_Xent(batch_size, temperature, world_size=1)
#criterion_task2 = NT_Xent(batch_size, temperature, world_size=1)

In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-2)


In [11]:
import torch
from collections import defaultdict
import torch.nn.functional as F

def train_validation(model, num_epochs, puzzle_dataloaders, mag_dataloaders, optimizer, criterion_task1, criterion_task2, device, l1_lambda, saving_name):
    hold_loss = []
    hold_loss1 = []
    hold_accuracy1 = []
    
    hold_loss2 = []
    hold_accuracy2 = []

    # Learning rate scheduler for potential learning rate decay
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_loss_task1 = 0.0
        running_loss_task2 = 0.0

        for i, ((data_task1, target_task1), (data_task2, target_task2)) in enumerate(zip(puzzle_dataloaders['train'], mag_dataloaders['train'])):
            data_task1, target_task1 = data_task1.to(device), target_task1.to(device)
            data_task2, target_task2 = data_task2.to(device), target_task2.to(device)
            
            optimizer.zero_grad()
            
            output_task1, output_task2 = model(data_task1, data_task2)
            
            loss_task1 = criterion_task1(output_task1, target_task1)
            loss_task2 = criterion_task2(output_task2, target_task2)
            
            base_loss = loss_task1 + loss_task2

            # Add L1 regularization
            l1_norm = sum(p.abs().sum() for p in model.parameters())
            loss = base_loss + l1_lambda * l1_norm  # Total loss including L1 penalty

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_loss_task1 += loss_task1.item()
            running_loss_task2 += loss_task2.item()

            if (i + 1) % 100 == 0:
                print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}], Loss: {running_loss / 100:.4f}, Loss Task1: {running_loss_task1 / 100:.4f}, Loss Task2: {running_loss_task2 / 100:.4f}")
                hold_loss.append(running_loss / 100)
                running_loss = 0.0
                running_loss_task1 = 0.0
                running_loss_task2 = 0.0
        
        # Adjust learning rate based on the scheduler
        scheduler.step()

        # Validation Phase
        model.eval()  
        total_correct1 = 0
        total_samples1 = 0
        
        total_correct2 = 0
        total_samples2 = 0
        
        results1 = defaultdict(list)
        results2 = defaultdict(list)

        with torch.no_grad():
            for (data_task1, target_task1), (data_task2, target_task2) in zip(puzzle_dataloaders['validation'], mag_dataloaders['validation']):
                data_task1, target_task1 = data_task1.to(device), target_task1.to(device)
                data_task2, target_task2 = data_task2.to(device), target_task2.to(device)
                
                output_task1, output_task2 = model(data_task1, data_task2)
                
                predicted1 = output_task1.argmax(1)
                predicted2 = output_task2.argmax(1)
                
                total_correct1 += (predicted1 == target_task1).sum().item()
                total_samples1 += target_task1.size(0)
                
                total_correct2 += (predicted2 == target_task2).sum().item()
                total_samples2 += target_task2.size(0)

                for label, pred in zip(target_task1.cpu().numpy(), predicted1.cpu().numpy()):
                    key = f"{label}"
                    results1[key].append(pred == label)
                
                for label, pred in zip(target_task2.cpu().numpy(), predicted2.cpu().numpy()):
                    key = f"{label}"
                    results2[key].append(pred == label)

        # Calculate accuracy for task 1 and task 2
        add_list1 = []
        add_list2 = []
        
        for key, values in results1.items():
            accuracy = sum(values) / len(values)
            add_list1.append(f"Accuracy for {key}: {accuracy:.2f}")
        hold_accuracy1.append(add_list1)

        total_accuracy1 = total_correct1 / total_samples1
        print(f"Total Accuracy Task 1: {total_accuracy1:.2f}")

        for key, values in results2.items():
            accuracy = sum(values) / len(values)
            add_list2.append(f"Accuracy for {key}: {accuracy:.2f}")
        hold_accuracy2.append(add_list2)

        total_accuracy2 = total_correct2 / total_samples2
        print(f"Total Accuracy Task 2: {total_accuracy2:.2f}")
        print("----------------------------------")
        
        if (epoch+1)%10 == 0:
            # Save model with a more informative name
            save_filename = f"{saving_name}resnet34model_puzzle_mag_epoch_{epoch+1}.pth"#saving_name# f"{saving_name}_epoch_{epoch+1}_acc1_{total_accuracy1:.2f}_acc2_{total_accuracy2:.2f}.pth"
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
            }, save_filename)
            print(f"Model saved as {save_filename}!")

    print("Training complete.")
    return hold_loss, hold_accuracy1, hold_accuracy2


In [12]:
hold_loss, hold_accuracy1, hold_accuracy2 = train_validation(model, 50, puzzle_dataloaders, mag_dataloaders, optimizer, criterion_task1, criterion_task2, device, 0, '/kaggle/working/')

Epoch [1/1], Step [100], Loss: 2.8507, Loss Task1: 0.8900, Loss Task2: 1.9608
Epoch [1/1], Step [200], Loss: 2.6971, Loss Task1: 0.7447, Loss Task2: 1.9524
Epoch [1/1], Step [300], Loss: 2.6896, Loss Task1: 0.7396, Loss Task2: 1.9499
Epoch [1/1], Step [400], Loss: 2.6913, Loss Task1: 0.7427, Loss Task2: 1.9486
Epoch [1/1], Step [500], Loss: 2.7016, Loss Task1: 0.7536, Loss Task2: 1.9480
Epoch [1/1], Step [600], Loss: 2.6989, Loss Task1: 0.7514, Loss Task2: 1.9475
Epoch [1/1], Step [700], Loss: 2.6823, Loss Task1: 0.7354, Loss Task2: 1.9469
Total Accuracy Task 1: 0.10
Total Accuracy Task 2: 0.15
----------------------------------
Model saved as /kaggle/working/50epoch_resnet34model_dbracs_mag.pth!
Training complete.


In [None]:
print(hold_loss)

In [None]:
print(hold_accuracy1)

In [None]:
print(hold_accuracy2)