In [1]:
import opendatasets as od

# Download the dataset
od.download("https://www.kaggle.com/datasets/yasserhessein/the-kvasir-dataset")

Dataset URL: https://www.kaggle.com/datasets/yasserhessein/the-kvasir-dataset
Downloading the-kvasir-dataset.zip to ./the-kvasir-dataset


100%|██████████| 2.32G/2.32G [00:36<00:00, 68.1MB/s]





In [2]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the paths for source, training, testing, and validation
source_path = "the-kvasir-dataset/kvasir-dataset-v2"
train_path = "data/training"
test_path = "data/testing"
validation_path = "data/validation"

# Define the split ratios
train_ratio = 0.7
test_ratio = 0.2
validation_ratio = 0.1

# Create the directories
for path in [train_path, test_path, validation_path]:
    os.makedirs(path, exist_ok=True)    
    
# Process each class
for class_name in os.listdir(source_path):
    class_dir = os.path.join(source_path, class_name)
    if os.path.isdir(class_dir):
        # List all images
        images = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
        
        # Split the dataset
        train_val, test = train_test_split(images, test_size=test_ratio, random_state=42)
        train, val = train_test_split(train_val, test_size=validation_ratio/(train_ratio+validation_ratio), random_state=42)
        
        # Define a function to copy files
        def copy_files(filenames, dest_dir):
            os.makedirs(dest_dir, exist_ok=True)
            for f in filenames:
                shutil.copy(f, dest_dir)
                
        # Copy the files
        copy_files(train, os.path.join(train_path, class_name))
        copy_files(test, os.path.join(test_path, class_name))
        copy_files(val, os.path.join(validation_path, class_name))

# Delete the downloaded dataset
shutil.rmtree("the-kvasir-dataset")

In [3]:
import os

# Get the length of the training, testing, and validation datasets
train_path = "data/training/normal-z-line"
test_path = "data/testing/normal-z-line"
validation_path = "data/validation/normal-z-line"

print("Training: ", len(os.listdir(train_path)))
print("Testing: ", len(os.listdir(test_path)))
print("Validation: ", len(os.listdir(validation_path)))

Training:  699
Testing:  200
Validation:  101


In [4]:
import torch

# Import PyTorch and setup device-agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(torch.__version__)
print(device)

2.4.0
cuda


In [6]:
from torchvision import datasets, transforms

train_path = "data/training"
test_path = "data/testing"

# Create simple transformations
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Use ImageFolder to create datasets
train_dataset = datasets.ImageFolder(root=train_path, 
                                     transform=data_transform,
                                     target_transform=None)

test_dataset = datasets.ImageFolder(root=test_path,
                                    transform = data_transform)

print(f"Train dataset:\n {train_dataset}\nTest dataset:\n {test_dataset}")

Train dataset:
 Dataset ImageFolder
    Number of datapoints: 5592
    Root location: data/training
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )
Test dataset:
 Dataset ImageFolder
    Number of datapoints: 1600
    Root location: data/testing
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )


In [7]:
# Get the class names as a dict
class_names = train_dataset.class_to_idx
print(class_names)

{'dyed-lifted-polyps': 0, 'dyed-resection-margins': 1, 'esophagitis': 2, 'normal-cecum': 3, 'normal-pylorus': 4, 'normal-z-line': 5, 'polyps': 6, 'ulcerative-colitis': 7}


In [8]:
# Check the length of the datasets
len(train_dataset), len(test_dataset)

(5592, 1600)

In [9]:
from torch.utils.data import DataLoader
import os

NUM_WORKERS = os.cpu_count()

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=32, 
                          num_workers=NUM_WORKERS,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=32,
                         num_workers=NUM_WORKERS,
                         shuffle=False)

print(train_loader)
print(test_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7fc51f039ac0>
<torch.utils.data.dataloader.DataLoader object at 0x7fc51f0396d0>


In [11]:
%%writefile modular/data_setup.py
"""
Defines the functionality for creating PyTorch DataLoaders for the multi-class classification dataset.
"""
import os

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders(train_dir: str, 
                       test_dir: str, 
                       transform: transforms.Compose, 
                       batch_size: int, 
                       num_workers: int=NUM_WORKERS):
    """Takes in a training and testing directory path and turns them into PyTorch DataLoaders.

    Args:
        train_dir (str): Path to training directory.
        test_dir (str): Path to testing directory.
        transform (transforms.Compose): Torchvision transforms to apply to the datasets.
        batch_size (int): Number of samples per batch in each DataLoader.
        num_workers (_type_): Number of workers per DataLoader. Currently set to os.cpu_count().

    Returns:
        Tuple: Returns a tuple of (train_loader, test_loader, class_names). Where class_names is a dict of the target classes.
    """
    # Use ImageFolder to create datasets
    train_data = datasets.ImageFolder(root=train_dir,
                                      transform=transform)
    test_data = datasets.ImageFolder(root=test_dir,
                                     transform=transform)
    
    # Get the class names
    class_names = train_data.class_to_idx
    
    # Create DataLoaders
    train_loader = DataLoader(dataset=train_data, 
                              batch_size=batch_size, 
                              num_workers=num_workers, 
                              shuffle=True,
                              pin_memory=True)
    
    test_loader = DataLoader(dataset=test_data,
                             batch_size=batch_size,
                             num_workers=num_workers,
                             shuffle=False,
                             pin_memory=True)
    
    return train_loader, test_loader, class_names

Overwriting modular/data_setup.py


In [12]:
import torch
from torch import nn

# Define the baseline model
class BaseLine(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_shape, hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units, output_shape),
            nn.ReLU()
        )
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layer_stack(x)

In [13]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# Instantiate the model
torch.manual_seed(42)
model = BaseLine(input_shape=224*224*3, 
                 hidden_units=10, 
                 output_shape=len(train_dataset.classes)).to(device)
model

BaseLine(
  (layer_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=150528, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=8, bias=True)
    (4): ReLU()
  )
)

In [14]:
import torch

# Get a batch of images and labels from the DataLoader
img_batch, label_batch = next(iter(train_loader))

# Get a single images from the batch
img_single, label_single = img_batch[0].unsqueeze(dim=0), label_batch[0]
print(f"Single image shape: {img_single.shape}\n")

# Perform a forward pass
model.eval()
with torch.inference_mode():
    pred = model(img_single.to(device))

# Print out the conversion from logits, to pred probs, and pred label
print(f"Output logits:\n{pred}\n")
print(f"Output prediction probabilities:\n{torch.softmax(pred, dim=1)}\n")
print(f"Output prediction label:\n{torch.argmax(torch.softmax(pred, dim=1), dim=1)}\n")
print(f"Actual label:\n{label_single}")

Single image shape: torch.Size([1, 3, 224, 224])

Output logits:
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1466, 0.0979, 0.3001]],
       device='cuda:0')

Output prediction probabilities:
tensor([[0.1161, 0.1161, 0.1161, 0.1161, 0.1161, 0.1345, 0.1281, 0.1568]],
       device='cuda:0')

Output prediction label:
tensor([7], device='cuda:0')

Actual label:
5


In [17]:
%%writefile modular/models/baseline_model.py
"""
Defines a PyTorch baseline model for multi-class classification.
"""
import torch
from torch import nn

class BaseLine(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        """Defines a simple feedforward neural network for multi-class classification.

        Args:
            input_shape (int): Number of input channels.
            hidden_units (int): Number of hidden units between layers.
            output_shape (int): Number of output units.
        """
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_shape, hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units, output_shape),
            nn.ReLU()
        )
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layer_stack(x)

Writing modular/models/baseline_model.py
