### Creating Custom Dataloaders in PyTorch

In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import torch
from torch.utils import data
from PIL import Image

Create partition and label dicts. You can get the cats and dogs dataset <href ="https://www.kaggle.com/chetankv/dogs-cats-images/data">here</a>. Ensure that it has the following structure:

dogs-cats-images

    training_set
    
        dogs
            ...
            
        cats
            ...
            
    test_set
    
        dogs
            ...
            
        cats
            ...

In [45]:
partition = {}
labels = {}

for split in os.listdir('dogs-cats-images'):
    
    partition[split] = []
    
    for label in os.listdir('dogs-cats-images/{}'.format(split)):
        
        for filename in glob.glob('dogs-cats-images/{}/{}/*.jpg'.format(split, label)):
            
            partition[split].append(filename)
            labels[filename] = label

Define ```Dataset``` class.

In [46]:
class Dataset(data.Dataset):
    
    """Characterizes a dataset for PyTorch'
    """
    def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = np.array(Image.open(ID))
        X = X[0:10,0:10,:]
        X = torch.tensor(X)
        
        y = self.labels[ID]
        
        if y == "cats":
            y = 1
        else:
            y = 0
            
        y = torch.tensor(y)

        return X, y

Test batching with generators. Note that ```AssertionError: can only join a child process``` will appear. This seems to be an issue with PyTorch itself.

In [47]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

# Parameters
params = {'batch_size': 8,
          'shuffle': True,
          'num_workers': 1}
max_epochs = 2


# Generators
training_set = Dataset(partition['training_set'], labels)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(partition['test_set'], labels)
validation_generator = data.DataLoader(validation_set, **params)

# Loop over epochs
for epoch in range(max_epochs):
    # Training
    for local_batch, local_labels in training_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        # Model computations
       

    # Validation
    with torch.set_grad_enabled(False):
        for local_batch, local_labels in validation_generator:
            # Transfer to GPU
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)

            # Model computations

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f9b221cb668>>
Traceback (most recent call last):
  File "/home/greg/virtual_environments/standard_env/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 926, in __del__
    self._shutdown_workers()
  File "/home/greg/virtual_environments/standard_env/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 906, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f9b221cb6d8>>
Traceback (most recent call last):
  File "/home/greg/virtual_environments/standard_env/lib/python3.6/site-packages/t

    self._shutdown_workers()
  File "/home/greg/virtual_environments/standard_env/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 906, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f9b22123c50>>
Traceback (most recent call last):
  File "/home/greg/virtual_environments/standard_env/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 926, in __del__
    self._shutdown_workers()
  File "/home/greg/virtual_environments/standard_env/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 906, in _shutdown_workers
    w.join()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._pare