In [1]:
!nvidia-smi

Sun Jun  2 16:42:03 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  | 00000000:81:00.0 Off |                    0 |
| N/A   31C    P0              53W / 400W |      4MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
!pip install -q transformers datasets timm torch torchvision

In [112]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm
import timm

from datasets import load_dataset

# Load the dataset
dataset = load_dataset("mowoe/random-layouts")

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def transform_examples(examples):
    examples["image"] = [transform(image.convert("RGB")) for image in examples["image"]]
    return examples

# Apply the transform_examples function to the dataset
dataset.set_transform(transform_examples)

In [118]:
# Debug: Check the structure of the dataset
print(type(dataset['train'][0]['image']))  # Should be <class 'torch.Tensor'>
print(dataset['train'][0]['image'].shape)  # Should be torch.Size([3, 224, 224])

<class 'torch.Tensor'>
torch.Size([3, 224, 224])


In [116]:
# Split dataset into train and test sets
train_size = int(0.8 * len(dataset['train']))
test_size = len(dataset['train']) - train_size
train_dataset, test_dataset = random_split(dataset['train'], [train_size, test_size])

In [122]:
# Custom collate function to handle batching
def collate_fn(batch):
    images = torch.stack([item['image'] for item in batch])
    labels = torch.tensor([item['label'] for item in batch], dtype=torch.long)
    return {'image': images, 'label': labels}

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

In [123]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained ResNet model
model = timm.create_model('resnet50', pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)  # Modify the classifier to output 2 classes
model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch in tqdm(train_loader):
            images = batch['image'].to(device)
            labels = batch['label'].to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')


In [124]:
# Training the model
train_model(model, train_loader, criterion, optimizer, num_epochs=5)

  return F.conv2d(input, weight, bias, self.stride,
100%|██████████| 75/75 [01:08<00:00,  1.09it/s]


Epoch [1/5], Loss: 0.5116


100%|██████████| 75/75 [01:06<00:00,  1.12it/s]


Epoch [2/5], Loss: 0.3922


100%|██████████| 75/75 [01:07<00:00,  1.12it/s]


Epoch [3/5], Loss: 0.3059


100%|██████████| 75/75 [01:06<00:00,  1.13it/s]


Epoch [4/5], Loss: 0.2370


100%|██████████| 75/75 [01:08<00:00,  1.09it/s]

Epoch [5/5], Loss: 0.1676





In [None]:
# Evaluate the model
from sklearn.metrics import accuracy_score

def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in tqdm(test_loader):
            images = batch['image'].to(device)
            labels = batch['label'].to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    print(f'Accuracy: {accuracy:.4f}')


# Evaluating the model
evaluate_model(model, test_loader)