In [2]:
import torch
from torch import nn, einsum
import numpy as np
from einops import rearrange, repeat
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from torch.utils.data import WeightedRandomSampler
import os
import h5py
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as f
from io import BytesIO
from tqdm import tqdm
from collections import Counter

from sklearn.utils.class_weight import compute_class_weight

from torch.optim.lr_scheduler import CosineAnnealingLR

import json

# Model

In [3]:
class CyclicShift(nn.Module):
    def __init__(self, displacement):
        super().__init__()
        self.displacement = displacement

    def forward(self, x):
        return torch.roll(x, shifts=(self.displacement, self.displacement), dims=(1, 2))


class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x


class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)


class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, dim),
        )

    def forward(self, x):
        return self.net(x)

In [4]:
def create_mask(window_size, displacement, upper_lower, left_right):
    mask = torch.zeros(window_size ** 2, window_size ** 2)

    if upper_lower:
        mask[-displacement * window_size:, :-displacement * window_size] = float('-inf')
        mask[:-displacement * window_size, -displacement * window_size:] = float('-inf')

    if left_right:
        mask = rearrange(mask, '(h1 w1) (h2 w2) -> h1 w1 h2 w2', h1=window_size, h2=window_size)
        mask[:, -displacement:, :, :-displacement] = float('-inf')
        mask[:, :-displacement, :, -displacement:] = float('-inf')
        mask = rearrange(mask, 'h1 w1 h2 w2 -> (h1 w1) (h2 w2)')

    return mask


def get_relative_distances(window_size):
    indices = torch.tensor(np.array([[x, y] for x in range(window_size) for y in range(window_size)]))
    distances = indices[None, :, :] - indices[:, None, :]
    return distances


In [5]:
class WindowAttention(nn.Module):
    def __init__(self, dim, heads, head_dim, shifted, window_size, relative_pos_embedding):
        super().__init__()
        inner_dim = head_dim * heads

        self.heads = heads
        self.scale = head_dim ** -0.5
        self.window_size = window_size
        self.relative_pos_embedding = relative_pos_embedding
        self.shifted = shifted

        if self.shifted:
            displacement = window_size // 2
            self.cyclic_shift = CyclicShift(-displacement)
            self.cyclic_back_shift = CyclicShift(displacement)
            self.upper_lower_mask = nn.Parameter(create_mask(window_size=window_size, displacement=displacement,
                                                             upper_lower=True, left_right=False), requires_grad=False)
            self.left_right_mask = nn.Parameter(create_mask(window_size=window_size, displacement=displacement,
                                                            upper_lower=False, left_right=True), requires_grad=False)

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)

        if self.relative_pos_embedding:
            self.relative_indices = get_relative_distances(window_size) + window_size - 1
            self.pos_embedding = nn.Parameter(torch.randn(2 * window_size - 1, 2 * window_size - 1))
        else:
            self.pos_embedding = nn.Parameter(torch.randn(window_size ** 2, window_size ** 2))

        self.to_out = nn.Linear(inner_dim, dim)

    def forward(self, x):
        if self.shifted:
            x = self.cyclic_shift(x)

        b, n_h, n_w, _, h = *x.shape, self.heads

        qkv = self.to_qkv(x).chunk(3, dim=-1)
        nw_h = n_h // self.window_size
        nw_w = n_w // self.window_size

        q, k, v = map(
            lambda t: rearrange(t, 'b (nw_h w_h) (nw_w w_w) (h d) -> b h (nw_h nw_w) (w_h w_w) d',
                                h=h, w_h=self.window_size, w_w=self.window_size), qkv)

        dots = einsum('b h w i d, b h w j d -> b h w i j', q, k) * self.scale

        if self.relative_pos_embedding:
            dots += self.pos_embedding[self.relative_indices[:, :, 0], self.relative_indices[:, :, 1]]
        else:
            dots += self.pos_embedding

        if self.shifted:
            dots[:, :, -nw_w:] += self.upper_lower_mask
            dots[:, :, nw_w - 1::nw_w] += self.left_right_mask

        attn = dots.softmax(dim=-1)

        out = einsum('b h w i j, b h w j d -> b h w i d', attn, v)
        out = rearrange(out, 'b h (nw_h nw_w) (w_h w_w) d -> b (nw_h w_h) (nw_w w_w) (h d)',
                        h=h, w_h=self.window_size, w_w=self.window_size, nw_h=nw_h, nw_w=nw_w)
        out = self.to_out(out)

        if self.shifted:
            out = self.cyclic_back_shift(out)
        return out

In [6]:
class SwinBlock(nn.Module):
    def __init__(self, dim, heads, head_dim, mlp_dim, shifted, window_size, relative_pos_embedding):
        super().__init__()
        self.attention_block = Residual(PreNorm(dim, WindowAttention(dim=dim,
                                                                     heads=heads,
                                                                     head_dim=head_dim,
                                                                     shifted=shifted,
                                                                     window_size=window_size,
                                                                     relative_pos_embedding=relative_pos_embedding)))
        self.mlp_block = Residual(PreNorm(dim, FeedForward(dim=dim, hidden_dim=mlp_dim)))

    def forward(self, x):
        x = self.attention_block(x)
        x = self.mlp_block(x)
        return x

In [7]:
class PatchMerging(nn.Module):
    def __init__(self, in_channels, out_channels, downscaling_factor):
        super().__init__()
        self.downscaling_factor = downscaling_factor
        self.patch_merge = nn.Unfold(kernel_size=downscaling_factor, stride=downscaling_factor, padding=0)
        self.linear = nn.Linear(in_channels * downscaling_factor ** 2, out_channels)

    def forward(self, x):
        b, c, h, w = x.shape
        new_h, new_w = h // self.downscaling_factor, w // self.downscaling_factor
        x = self.patch_merge(x).view(b, -1, new_h, new_w).permute(0, 2, 3, 1)
        x = self.linear(x)
        return x

In [8]:
class StageModule(nn.Module):
    def __init__(self, in_channels, hidden_dimension, layers, downscaling_factor, num_heads, head_dim, window_size,
                 relative_pos_embedding):
        super().__init__()
        assert layers % 2 == 0, 'Stage layers need to be divisible by 2 for regular and shifted block.'

        self.patch_partition = PatchMerging(in_channels=in_channels, out_channels=hidden_dimension,
                                            downscaling_factor=downscaling_factor)

        self.layers = nn.ModuleList([])
        for _ in range(layers // 2):
            self.layers.append(nn.ModuleList([
                SwinBlock(dim=hidden_dimension, heads=num_heads, head_dim=head_dim, mlp_dim=hidden_dimension * 4,
                          shifted=False, window_size=window_size, relative_pos_embedding=relative_pos_embedding),
                SwinBlock(dim=hidden_dimension, heads=num_heads, head_dim=head_dim, mlp_dim=hidden_dimension * 4,
                          shifted=True, window_size=window_size, relative_pos_embedding=relative_pos_embedding),
            ]))

    def forward(self, x):
        x = self.patch_partition(x)
        for regular_block, shifted_block in self.layers:
            x = regular_block(x)
            x = shifted_block(x)
        return x.permute(0, 3, 1, 2)

In [9]:
class FeaturePyramidNetwork(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels

        # Lateral connections
        self.lateral_convs = nn.ModuleList([
            nn.Conv2d(in_channels[i], out_channels, kernel_size=1)
            for i in range(len(in_channels))
        ])

        # FPN connections
        self.fpn_convs = nn.ModuleList([
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
            for _ in range(len(in_channels))
        ])

    def forward(self, features):
        laterals = [conv(feature) for feature, conv in zip(features, self.lateral_convs)]
        
        fpn_features = [laterals[-1]]
        for i in range(len(laterals)-2, -1, -1):
            up = f.interpolate(fpn_features[0], size=laterals[i].shape[-2:], mode='nearest')
            fpn_features.insert(0, laterals[i] + up)
        
        outputs = [conv(feature) for feature, conv in zip(fpn_features, self.fpn_convs)]
        return outputs

In [11]:
class SwinTransformerWithFPN(nn.Module):
    def __init__(self, *, hidden_dim, layers, heads, channels=3, num_classes=1, head_dim=32, window_size=4,
                 downscaling_factors=(4, 2, 2, 2), relative_pos_embedding=True):
        super().__init__()

        self.stage1 = StageModule(in_channels=channels, hidden_dimension=hidden_dim, layers=layers[0],
                                  downscaling_factor=downscaling_factors[0], num_heads=heads[0], head_dim=head_dim,
                                  window_size=window_size, relative_pos_embedding=relative_pos_embedding)
        self.stage2 = StageModule(in_channels=hidden_dim, hidden_dimension=hidden_dim * 2, layers=layers[1],
                                  downscaling_factor=downscaling_factors[1], num_heads=heads[1], head_dim=head_dim,
                                  window_size=window_size, relative_pos_embedding=relative_pos_embedding)
        self.stage3 = StageModule(in_channels=hidden_dim * 2, hidden_dimension=hidden_dim * 4, layers=layers[2],
                                  downscaling_factor=downscaling_factors[2], num_heads=heads[2], head_dim=head_dim,
                                  window_size=window_size, relative_pos_embedding=relative_pos_embedding)
        self.stage4 = StageModule(in_channels=hidden_dim * 4, hidden_dimension=hidden_dim * 8, layers=layers[3],
                                  downscaling_factor=downscaling_factors[3], num_heads=heads[3], head_dim=head_dim,
                                  window_size=window_size, relative_pos_embedding=relative_pos_embedding)

        # FPN
        self.fpn = FeaturePyramidNetwork(
            in_channels=[hidden_dim, hidden_dim * 2, hidden_dim * 4, hidden_dim * 8],
            out_channels=256
        )

        # Global Average Pooling
        self.gap = nn.AdaptiveAvgPool2d(1)

        # Classification head
        self.mlp_head = nn.Sequential(
            nn.Linear(256 * 4, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes),
            nn.Sigmoid()
        )

    def forward(self, img):
        x1 = self.stage1(img)
        x2 = self.stage2(x1)
        x3 = self.stage3(x2)
        x4 = self.stage4(x3)

        # FPN
        fpn_features = self.fpn([x1, x2, x3, x4])

        # Global Average Pooling on each FPN output
        pooled_features = [self.gap(feature) for feature in fpn_features]
        
        # Concatenate the pooled features
        x = torch.cat(pooled_features, dim=1)
        x = x.view(x.size(0), -1)

        # Classification
        return self.mlp_head(x)

# Dataloader

In [12]:
# class ImageLoader(Dataset):
#     def __init__(self, df, file_hdf, transform=None):
#         self.df = pd.read_csv(df)
#         self.fp_hdf = h5py.File(file_hdf, mode="r")
#         self.isic_ids = self.df['isic_id'].values
#         self.targets = self.df['target'].values
#         self.transform = transform
        
#     def __len__(self):
#         return len(self.isic_ids)

#     def __getitem__(self, index):
#         isic_id = self.isic_ids[index]
#         image = Image.open(BytesIO(self.fp_hdf[isic_id][()]))
#         target = self.targets[index]
        
#         if self.transform:
#             return (self.transform(image), target)
#         else:
#             return (image, target)

# # use in dataloader (for balancing the class it give more probability to minority class to get select in batch)
# df = pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
# labels = df['target'].values
# label_counts = Counter(labels) # Calculate the frequency of each class
# total_count = len(labels)
# class_weights = {label: total_count / count for label, count in label_counts.items()} 
# sample_weights = [class_weights[label] for label in labels] 
# sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)        

In [13]:
# image_size = 128
# train_transforms = transforms.Compose([
#     transforms.Resize(image_size),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(degrees=20),
#     transforms.ToTensor()
# ])


# train_dataset = ImageLoader(
#     df='/kaggle/input/isic-2024-challenge/train-metadata.csv',
#     file_hdf='/kaggle/input/isic-2024-challenge/train-image.hdf5',
#     transform=train_transforms
# )

# train_loader = DataLoader(dataset=train_dataset, 
#                           batch_size=128, 
#                           sampler=sampler, 
#                           shuffle=False, 
#                           num_workers=4)

# Training

In [14]:
# class FocalLoss(nn.Module):
#     def __init__(self, alpha=0.25, gamma=2):
#         super(FocalLoss, self).__init__()
#         self.alpha = alpha
#         self.gamma = gamma

#     def forward(self, inputs, targets):
#         BCE_loss = nn.functional.binary_cross_entropy(inputs, targets, reduction='none')
#         pt = torch.exp(-BCE_loss)
#         F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
#         return torch.mean(F_loss)

# def performance_metrics(predictions, labels, threshold=0.5):
#     binary_predictions = (predictions >= threshold).float()
    
#     true_positives = torch.sum((binary_predictions == 1) & (labels == 1)).item()
#     true_negatives = torch.sum((binary_predictions == 0) & (labels == 0)).item()
#     false_positives = torch.sum((binary_predictions == 1) & (labels == 0)).item()
#     false_negatives = torch.sum((binary_predictions == 0) & (labels == 1)).item()
    
#     accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)
    
#     precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0.0
#     recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0.0
    
#     return accuracy, precision, recall

In [15]:
# device = 'cuda'
# model = SwinTransformerWithFPN(
#     hidden_dim=96,
#     layers=(2, 2, 6, 2),
#     heads=(3, 6, 12, 24),
#     num_classes=1
# ).to(device)

# # Training parameters
# epochs = 30
# loss_fn = FocalLoss(alpha=0.25, gamma=2)

# # Optimizer with weight decay (L2 regularization)
# optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# # Learning rate scheduler
# scheduler = CosineAnnealingLR(optimizer, T_max=epochs)

# # Training loop
# train_loss = []
# train_accuracy = []
# train_precision = []
# train_recall = []
# learning_rates = []

# for epoch in range(epochs):
#     print(f'Epoch {epoch + 1} / {epochs}')
#     model.train()
#     epoch_loss = 0
#     epoch_accuracy = 0
#     epoch_precision = 0
#     epoch_recall = 0
#     count = 0

#     for images, labels in tqdm(train_loader):
#         optimizer.zero_grad()
#         images = images.to(device)
#         labels = labels.unsqueeze(1).float().to(device)
        
#         outputs = model(images)
#         loss = loss_fn(outputs, labels)
        
#         loss.backward()
#         optimizer.step()
        
#         epoch_loss += loss.item()
#         temp = performance_metrics(outputs, labels)
#         epoch_accuracy += temp[0]
#         epoch_precision += temp[1]
#         epoch_recall += temp[2]
#         count += 1
    
#     # Calculate average metrics for the epoch
#     avg_loss = epoch_loss / count
#     avg_accuracy = epoch_accuracy / count
#     avg_precision = epoch_precision / count
#     avg_recall = epoch_recall / count
    
#     train_loss.append(avg_loss)
#     train_accuracy.append(avg_accuracy)
#     train_precision.append(avg_precision)
#     train_recall.append(avg_recall)
#     learning_rates.append(optimizer.param_groups[0]['lr'])
    
#     print(f'Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.4f}, Precision: {avg_precision:.4f}, Recall: {avg_recall:.4f}')
#     print(f'Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')
    
#     if (epoch + 1) % 5 == 0:
#         torch.save(model.state_dict(), f'/kaggle/working/my_model_epoch_{epoch+1}.pt')
    
#     # Step the scheduler
#     scheduler.step()

# # Save training history
# history = {
#     'train_loss': train_loss,
#     'train_accuracy': train_accuracy,
#     'train_precision': train_precision,
#     'train_recall': train_recall,
#     'learning_rates': learning_rates
# }

# with open('/kaggle/working/train_metrics.json', 'w') as f:
#     json.dump({k: [float(v) for v in vals] for k, vals in history.items()}, f)

# print("Training completed and metrics saved.")

# Prediciton

In [16]:
# device='cuda'
# class TestImageLoader(Dataset):
#     def __init__(self, file_hdf, transform=None):
#         self.fp_hdf = h5py.File(file_hdf, mode="r")
#         self.isic_ids = list(self.fp_hdf.keys())
#         self.transform = transform
        
#     def __len__(self):
#         return len(self.isic_ids)
    
#     def __getitem__(self, index):
#         isic_id = self.isic_ids[index]
#         image = Image.open(BytesIO(self.fp_hdf[isic_id][()]))
        
#         if self.transform:
#             return self.transform(image), isic_id
#         else:
#             return image, isic_id

# test_transform = transforms.Compose([
#     transforms.Resize((128, 128)),  
#     transforms.ToTensor()  
# ])

# test_dataset = TestImageLoader('/kaggle/input/isic-2024-challenge/test-image.hdf5', transform=test_transform)

# test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4)


# model = SwinTransformerWithFPN(
#     hidden_dim=96,
#     layers=(2, 2, 6, 2),
#     heads=(3, 6, 12, 24),
#     num_classes=1
# ).to(device)

# model.load_state_dict(torch.load('/kaggle/input/model2/my_model_epoch_20.pt'))
# model.eval()

# results = {}
# with torch.no_grad():
#     for images, isic_ids in test_loader:
#         images = images.to(device)
#         outputs = model(images)
        
#         for isic_id, prob in zip(isic_ids, outputs):
#             results[isic_id] = prob.item()

# df = pd.DataFrame(list(results.items()), columns=['isic_id', 'target'])
# df.to_csv('/kaggle/working/submission.csv' , index=False)
# print("Predictions completed and saved to submission.csv")
# test_dataset.fp_hdf.close()

Predictions completed and saved to submission.csv


--------------------------------------------------------