In [1]:
!pip install timm



In [2]:
import cv2
import os
import torch
import torchvision
import timm
import csv

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from timm.data import create_dataset
from timm.data.transforms_factory import create_transform

In [3]:
!pip install gdown
!gdown --id 1-3_5KxKYPqsDDYLdmjQoaoHuPFN0uF9c

Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
Successfully installed gdown-4.7.1
Downloading...
From (uriginal): https://drive.google.com/uc?id=1-3_5KxKYPqsDDYLdmjQoaoHuPFN0uF9c
From (redirected): https://drive.google.com/uc?id=1-3_5KxKYPqsDDYLdmjQoaoHuPFN0uF9c&confirm=t&uuid=a9edf5a8-1f8f-48de-ad81-9950594b693f
To: /kaggle/working/data.zip
100%|█████████████████████████████████████████| 366M/366M [00:02<00:00, 159MB/s]


In [4]:
! unzip data.zip

Archive:  data.zip
  inflating: sample_submission.csv   
  inflating: Untitled0.ipynb         
   creating: data/
   creating: data/train/
   creating: data/train/61/
  inflating: data/train/61/675.jpg   
  inflating: data/train/61/679.jpg   
  inflating: data/train/61/674.jpg   
  inflating: data/train/61/677.jpg   
  inflating: data/train/61/680.jpg   
  inflating: data/train/61/678.jpg   
  inflating: data/train/61/681.jpg   
  inflating: data/train/61/676.jpg   
   creating: data/train/95/
  inflating: data/train/95/1027.jpg  
  inflating: data/train/95/1025.jpg  
  inflating: data/train/95/1029.jpg  
  inflating: data/train/95/1028.jpg  
  inflating: data/train/95/1031.jpg  
  inflating: data/train/95/1030.jpg  
  inflating: data/train/95/1026.jpg  
  inflating: data/train/95/1024.jpg  
  inflating: data/train/95/1033.jpg  
  inflating: data/train/95/1032.jpg  
  inflating: data/train/95/1034.jpg  
   creating: data/train/59/
  inflating: data/train/59/660.jpg   
  inflating: data

In [5]:
import numpy as np
# Define CutMix function
def cutmix_data(inputs, labels, alpha=1.0):
    lam = np.random.beta(alpha, alpha) if alpha > 0 else 1
    rand_index = torch.randperm(inputs.size(0)).to(inputs.device)
    target_a = labels
    target_b = labels[rand_index]
    bbx1, bby1, bbx2, bby2 = rand_bbox(inputs.size(), lam)
    inputs[:, :, bbx1:bbx2, bby1:bby2] = inputs[rand_index, :, bbx1:bbx2, bby1:bby2]
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (inputs.size()[-1] * inputs.size()[-2]))
    return inputs, target_a, target_b, lam

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)  # Updated from np.int to int
    cut_h = int(H * cut_rat)  # Updated from np.int to int
    cx = np.random.randint(W)
    cy = np.random.randint(H)
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2

In [92]:
import time
from tempfile import TemporaryDirectory
# Define the training function
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_acc = 0.0
    epochs_no_improve = 0  # Track epochs with no improvement
    #early_stopping_patience = 6  # Early stopping patience
    #with TemporaryDirectory() as tempdir:
    best_model_params_path = '/kaggle/working/best_model_params.pth.tar'
    torch.save(model.state_dict(), best_model_params_path)
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    # Apply CutMix or standard training
                    if phase == 'train' and np.random.rand() < 0.5: # 50% probability
                        inputs, targets_a, targets_b, lam = cutmix_data(inputs, labels, alpha=1.0)
                        outputs = model(inputs)
#                             import pdb; pdb.set_trace()
                        loss = lam * criterion(outputs, targets_a) + (1 - lam) * criterion(outputs, targets_b)
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val':
              if epoch_acc > best_acc:
                  best_acc = epoch_acc
                  torch.save(model.state_dict(), best_model_params_path)
              else:
                #epochs_no_improve += 1
#                     if epochs_no_improve == early_stopping_patience:
#                         print("Early stopping triggered")
#                         time_elapsed = time.time() - since
#                         print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
#                         print(f'Best val Acc: {best_acc:4f}')
#                         # Load best model weights and return
#                         model.load_state_dict(torch.load(best_model_params_path))
#                         return model
                continue
        if phase == 'train':
            scheduler.step()

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    model.load_state_dict(torch.load(best_model_params_path))
    return model

# Data augmentation and normalization for training
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.RandomGrayscale(p=0.25),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [93]:
from torchvision import datasets, transforms
from torch.utils.data import random_split
# Load data
transform = transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.Normalize((0.5,), (0.5,))
])

#data_dir = '/content/drive/MyDrive/CSE244/ucsc-cse-244-fall-2023-final-project/data/'
data_dir = '/kaggle/working/data'
full_train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train'])
val_size = int(0.2 * len(full_train_dataset))
train_size = int(0.8 * len(full_train_dataset))
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = data_transforms['val']
#train_dataset = CustomImageDataset(train_images_in_ram, train_images_label, data_transforms['train'])
#val_dataset = CustomImageDataset(val_images_in_ram,val_images_label, data_transforms['val'])
#test_dataset = CustomImageDataset(test_images_in_ram, transform)
dataloaders = {
    'train': DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4),
    'val': DataLoader(val_dataset, batch_size=16, shuffle=True, num_workers=4)
}
dataset_sizes = {x: len(dataloaders[x].dataset) for x in ['train', 'val']}
class_names = full_train_dataset.classes
#trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64,
#                                           shuffle=True)
#valloader = torch.utils.data.DataLoader(val_dataset, batch_size=64,
#                                        shuffle=False)
#testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64,
#                                        shuffle=False)

In [94]:
import torch
import gc
torch.cuda.empty_cache()
gc.collect()

526

In [67]:
import torch.nn as nn
import torch.optim as optim
model = timm.create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=100)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.001)

# Use ReduceLROnPlateau scheduler
rp_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_ft, mode='min', factor=0.1, patience=2, verbose=True)

model_ft = train_model(model, criterion, optimizer_ft, rp_lr_scheduler, num_epochs=30)

Epoch 0/29
----------
train Loss: 3.8825 Acc: 0.1856
val Loss: 2.6570 Acc: 0.4506

Epoch 1/29
----------
train Loss: 2.1997 Acc: 0.5284
val Loss: 1.4395 Acc: 0.6438

Epoch 2/29
----------
train Loss: 1.5355 Acc: 0.6497
val Loss: 0.9508 Acc: 0.7146

Epoch 3/29
----------
train Loss: 1.1771 Acc: 0.7259
val Loss: 0.8194 Acc: 0.7489

Epoch 4/29
----------
train Loss: 1.2577 Acc: 0.7092
val Loss: 0.7627 Acc: 0.7511

Epoch 5/29
----------
train Loss: 1.0435 Acc: 0.7688
val Loss: 0.6702 Acc: 0.7833

Epoch 6/29
----------
train Loss: 0.9673 Acc: 0.7956
val Loss: 0.6287 Acc: 0.7854

Epoch 7/29
----------
train Loss: 0.8452 Acc: 0.8203
val Loss: 0.6732 Acc: 0.7618

Epoch 8/29
----------
train Loss: 0.8121 Acc: 0.8197
val Loss: 0.5735 Acc: 0.8026

Epoch 9/29
----------
train Loss: 0.7570 Acc: 0.8611
val Loss: 0.5612 Acc: 0.8176

Epoch 10/29
----------
train Loss: 0.7795 Acc: 0.8069
val Loss: 0.5397 Acc: 0.8219

Epoch 11/29
----------
train Loss: 0.7364 Acc: 0.8632
val Loss: 0.5069 Acc: 0.8176

Ep

In [12]:
from PIL import Image
def visualize_model_predictions(model, img_path, class_names):
    was_training = model.training
    model.eval()

    # Load and transform the image
    img = Image.open(img_path)
    img_transformed = data_transforms['val'](img)
    img_transformed = img_transformed.unsqueeze(0)
    img_transformed = img_transformed.to(device)

    with torch.no_grad():
        outputs = model(img_transformed)
        _, preds = torch.max(outputs, 1)
        predicted_label = class_names[preds[0]]

    model.train(mode=was_training)

    return img_path.split('/')[-1], predicted_label

In [84]:
import os
import csv

# Directory containing test images
test_data_dir = '/kaggle/working/data/test'

# File to store results
output_file = 'predictions_swin_60.csv'

# Load your trained model
model_ft.eval()

# Open CSV file for writing
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['ID', 'Label'])

    # Iterate over test images
    for img_file in os.listdir(test_data_dir):
        img_path = os.path.join(test_data_dir, img_file)
        if os.path.isfile(img_path):
            img_name, predicted_label = visualize_model_predictions(model_ft, img_path, class_names)
            writer.writerow([img_name, predicted_label])

In [16]:
!CUDA_VISIBLE_DEVCIES=0 python3 inference.py --help

usage: inference.py [-h] --model MODEL --ckpt CKPT [--output_dir OUTPUT_DIR]
                    [--output_csv OUTPUT_CSV] [--num_classes NUM_CLASSES]
                    [--in_chans IN_CHANS] [--batch BATCH] --test_dir TEST_DIR
                    [--workers WORKERS] [--topk TOPK]

Inference script for timm

options:
  -h, --help            show this help message and exit
  --model MODEL, -m MODEL
                        Name of the model
  --ckpt CKPT, -c CKPT  Path to the checkpoint file
  --output_dir OUTPUT_DIR, -od OUTPUT_DIR
                        Directory to the inference results
  --output_csv OUTPUT_CSV, -oc OUTPUT_CSV
                        Name of the output CSV file
  --num_classes NUM_CLASSES, -nc NUM_CLASSES
                        Number of the classes
  --in_chans IN_CHANS, -in IN_CHANS
                        Number of the input channels
  --batch BATCH, -b BATCH
                        Inference batch size
  --test_dir TEST_DIR, -td TEST_DIR
                      

In [12]:
!CUDA_VISIBLE_DEVCIES=0 python3 inference.py \
    -m swin_large_patch4_window7_224 \
    -c ./best_model_params.pth.tar \
    -b 16 \
    -td /kaggle/working/data/test/

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Processed batch: 0
Processed batch: 1
Processed batch: 2
Processed batch: 3
Processed batch: 4
Processed batch: 5
Processed batch: 6
Processed batch: 7
Processed batch: 8
Processed batch: 9
Processed batch: 10
Processed batch: 11
Processed batch: 12
Processed batch: 13
Processed batch: 14
Processed batch: 15
Processed batch: 16
Processed batch: 17
Processed batch: 18
Processed batch: 19
Processed batch: 20
Processed batch: 21
Processed batch: 22
Processed batch: 23
Processed batch: 24
Processed batch: 25
Processed batch: 26
Processed batch: 27
Processed batch: 28
Processed batch: 29
Processed batch: 30
Processed batch: 31
Processed batch: 32
Processed batch: 33
Processed batch: 34
Processed batch: 35
Processed batch: 36
Processed batch: 37
Processed batch: 38
Processed batch: 39
Processed batch: 40
Processed batch: 41
Processed batch: 42
Processed batch: 43
Processed batch: 44
Processed batch: 45
Processed batch: 46