In [7]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import pandas as pd
import torch
from torch.utils.data import Dataset

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [9]:
class CustomCSVDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): Path to the CSV file containing data.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.transform = transform
        self.targets = ["X4", "X11", "X18", "X26", "X50", "X3112"]

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data_frame.iloc[idx]

        id = sample['id']
        targets = sample[[target+"_mean" for target in self.targets]]          
        features = sample.drop(["id"] + [target+"_mean" for target in self.targets] + [target+"_sd" for target in self.targets]) 
        
        features = torch.tensor(features, dtype=torch.float32)
        targets = torch.tensor(targets, dtype=torch.float32)

        if self.transform:
            features = self.transform(features)

        return id, features, targets

In [59]:
class PGLSDataset(Dataset):
    def __init__(self, csv_file, image_folder, transform_csv=None):
        """
        Args:
            csv_file (string): Path to the CSV file containing data.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data_frame = pd.read_csv(csv_file, sep=",")
        self.image_folder = image_folder
        self.transform_csv = transform_csv
        self.targets = ["X4", "X11", "X18", "X26", "X50", "X3112"]

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data_frame.iloc[idx]
        id = int(sample['id'])
        image = self.image_folder.loader(self.image_folder.root + "/0/" + str(id) + ".jpeg")

        if self.image_folder.transform is not None:
            image = self.image_folder.transform(image)

        targets = sample[[f"{target}_mean" for target in self.targets]].values        
        features = sample.drop(["id"] + [f"{target}_mean" for target in self.targets] + [f"{target}_sd" for target in self.targets])
        
        features = torch.tensor(features.values, dtype=torch.float32)
        targets = torch.tensor(targets, dtype=torch.float32)

        if self.transform_csv:
            features = self.transform_csv(features)
            
        return image, features, targets

In [56]:
transform = transforms.Compose([
    transforms.ToTensor()            # Convert PIL image to tensor (H x W x C) in the range [0.0, 1.0]
])

batch_size = 32

In [60]:
train_images_path = 'data/train_images'
train_csv_path = 'data/train.csv'

train_csv_dataset = CustomCSVDataset(csv_file=train_csv_path, transform=None)
train_images_dataset = ImageFolder(root=train_images_path, transform=transform)

train_image_csv_dataset = PGLSDataset(csv_file=train_csv_path, image_folder=train_images_dataset, transform_csv=None)

data_loader = DataLoader(train_image_csv_dataset, batch_size=batch_size, shuffle=True)

In [61]:
for data in data_loader:
    image, features, targets = data
    print(image.shape, features.shape, targets.shape)
    break

torch.Size([32, 3, 512, 512]) torch.Size([32, 163]) torch.Size([32, 6])
