# Create a Custom Dataset Class for Image Data

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torchvision import datasets, transforms
from PIL import Image
import os

In [2]:
class ImageDataset(Dataset):
    def __init__(self, image_dir, transform=None):  # accepts input as directory/folder where all images are saved in respective class named folders
        self.image_dir = image_dir
        self.transform = transform   # Any image transformation operation
        self.image_paths = []  # Store image file paths
        self.labels = []  # Store image labels

        # Load all image paths and their corresponding labels (class folder names)
        for label, class_dir in enumerate(os.listdir(image_dir)):  # Each folder is a class (folder name = class_name)
            class_path = os.path.join(image_dir, class_dir)    # Path of folder of each class
            for img_name in os.listdir(class_path):                  # iterate through each class_name folder
                self.image_paths.append(os.path.join(class_path, img_name))   # Append path of each image in class_name folder into list
                self.labels.append(label)                                     # Append respective class of image in this list ie folder name

    def __len__(self):
        return len(self.image_paths)  # Returns the total number of images

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]      # get image path
        image = Image.open(img_path).convert("RGB")  # Load image and convert to RGB
        label = self.labels[idx]               # get respective lebel

        if self.transform:
            image = self.transform(image)   # apply mentioned transformation on image

        return image, label                 # lists contains images path and respective class


In [3]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

In [6]:
# Paths to the image datasets
train_image_dir = 'path/to/train'
val_image_dir = 'path/to/val'
test_image_dir = 'path/to/test'

# Create datasets
train_image_dataset = ImageDataset(image_dir=train_image_dir, transform=transform)
val_image_dataset = ImageDataset(image_dir=val_image_dir, transform=transform)
test_image_dataset = ImageDataset(image_dir=test_image_dir, transform=transform)

# Create DataLoaders
train_image_loader = DataLoader(dataset=train_image_dataset, batch_size=32, shuffle=True)
val_image_loader = DataLoader(dataset=val_image_dataset, batch_size=32, shuffle=False)
test_image_loader = DataLoader(dataset=test_image_dataset, batch_size=32, shuffle=False)

# Example: Iterating through batches of the train_image_loader
for images, labels in train_image_loader:
    print(images.shape, labels.shape)


FileNotFoundError: [Errno 2] No such file or directory: 'path/to/train'

In [7]:

print(os.listdir("/content/sample_data"))

for idx, file_name in enumerate(os.listdir("/content/sample_data")):
  print(idx, file_name)
  print(os.path.join("/content/sample_data", file_name))

['README.md', 'anscombe.json', 'mnist_train_small.csv', 'mnist_test.csv', 'california_housing_test.csv', 'california_housing_train.csv']
0 README.md
/content/sample_data/README.md
1 anscombe.json
/content/sample_data/anscombe.json
2 mnist_train_small.csv
/content/sample_data/mnist_train_small.csv
3 mnist_test.csv
/content/sample_data/mnist_test.csv
4 california_housing_test.csv
/content/sample_data/california_housing_test.csv
5 california_housing_train.csv
/content/sample_data/california_housing_train.csv
