In [1]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import torch
from torch.utils.data import DataLoader
from transformers import SwinForImageClassification, AutoFeatureExtractor
from torchvision import transforms
from torchvision.datasets import ImageFolder
from tqdm import tqdm  # Optional: for progress bars


In [3]:
# Define your dataset path
data_path = '/kaggle/input/gallbladder/Gallblader Diseases Dataset'

# Load the feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained('microsoft/swin-base-patch4-window7-224')

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to tensors
])

# Load dataset
dataset = ImageFolder(root=data_path, transform=transform)
train_size = int(0.8 * len(dataset))  # 80% for training
test_size = len(dataset) - train_size  # 20% for testing
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]



In [4]:
# Load pre-trained Swin model
model = SwinForImageClassification.from_pretrained(
    'microsoft/swin-base-patch4-window7-224',
    num_labels=len(dataset.classes),  # Number of classes in your dataset
    ignore_mismatched_sizes=True       # Ignore size mismatch in the classifier layer
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([9]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([9, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0-1): 2 x SwinLayer(
              (layernorm_before): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=128, out_features=128, bias=True)
                  (key): Linear(in_features=128, out_features=128, bias=True)
                  (value): Linear(in_features=128, out_features=128, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfO

In [5]:
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [6]:
def train(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in tqdm(train_loader):  # Optional: use tqdm for progress bar
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs).logits  # Get logits from model
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}')


In [7]:
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).logits  # Get logits from model
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the model on the test dataset: {100 * correct / total:.2f}%')


In [8]:
# Train the model
train(model, train_loader, criterion, optimizer, epochs=5)

# Test the model
test(model, test_loader)


100%|██████████| 268/268 [05:06<00:00,  1.14s/it]


Epoch [1/5], Loss: 1.0953


100%|██████████| 268/268 [04:39<00:00,  1.04s/it]


Epoch [2/5], Loss: 0.1137


100%|██████████| 268/268 [04:37<00:00,  1.03s/it]


Epoch [3/5], Loss: 0.0469


100%|██████████| 268/268 [04:38<00:00,  1.04s/it]


Epoch [4/5], Loss: 0.0458


100%|██████████| 268/268 [04:39<00:00,  1.04s/it]


Epoch [5/5], Loss: 0.0163
Accuracy of the model on the test dataset: 99.49%


In [9]:
# Save the trained model and feature extractor to a specified directory
savedPath='/kaggle/working/swin_model'
model.save_pretrained(savedPath)
feature_extractor.save_pretrained(savedPath)


['/kaggle/working/swin_model/preprocessor_config.json']