In [1]:
import torch
from torch.utils import data

In [2]:
class Dataset(data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = torch.load('data/' + ID + '.pt')
        y = self.labels[ID]

        return X, y

In [4]:
partition = {'train': ['id-1', 'id-2', 'id-3'], 'validation': ['id-4']}

labels = {'id-1': 0, 'id-2': 1, 'id-3': 2, 'id-4': 1}

In [3]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
cudnn.benchmark = True

# Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}
max_epochs = 100

# Datasets
partition = # IDs
labels = # Labels

# Generators
training_set = Dataset(partition['train'], labels)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(partition['validation'], labels)
validation_generator = data.DataLoader(validation_set, **params)

# Loop over epochs
for epoch in range(max_epochs):
    # Training
    for local_batch, local_labels in training_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        # Model computations
       

    # Validation
    with torch.set_grad_enabled(False):
        for local_batch, local_labels in validation_generator:
            # Transfer to GPU
            local_batch, local_labels = local_batch.to(device), local_labels.to(device)

            # Model computations
           

SyntaxError: invalid syntax (<ipython-input-3-88d14ac2b29c>, line 13)

### Building Efficient Custom Datasets in PyTorch

Tutorial from https://towardsdatascience.com/building-efficient-custom-datasets-in-pytorch-2563b946fd9f

When creating our own ```Dataset``` child classes, we must overrite the parent ```__len__``` and ```__gititem__``` methods.

In [16]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os

class NumbersDataset(Dataset):
    def __init__(self):
        self.samples = list(range(1, 1001))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]


dataset = NumbersDataset()
print(len(dataset))
print(dataset[100])
print(dataset[122:361])

1000
101
[123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 

In [23]:
class TESNamesDataset(Dataset):
    def __init__(self, data_root):
        self.samples = []

        for race in os.listdir(data_root):
            race_folder = os.path.join(data_root, race)

            for gender in os.listdir(race_folder):
                gender_filepath = os.path.join(race_folder, gender)

                with open(gender_filepath, 'r') as gender_file:
                    for name in gender_file.read().splitlines():
                        self.samples.append((race, gender, name))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]


if __name__ == '__main__':
    
    dataset = TESNamesDataset('tes-names/')
    dataloader = DataLoader(dataset, batch_size=50, shuffle=True, num_workers=2)    
    for i, batch in enumerate(dataloader):
        print(i, batch)

0 [('Redguard', 'Redguard', 'Orc', 'Breton', 'Orc', 'Nord', 'Imperial', 'Dunmer', 'Altmer', 'Redguard', 'Breton', 'Orc', 'Imperial', 'Argonian', 'Breton', 'Dunmer', 'Altmer', 'Dunmer', 'Altmer', 'Bosmer', 'Breton', 'Dunmer', 'Nord', 'Altmer', 'Redguard', 'Dunmer', 'Dunmer', 'Altmer', 'Nord', 'Orc', 'Nord', 'Bosmer', 'Nord', 'Redguard', 'Breton', 'Breton', 'Imperial', 'Nord', 'Imperial', 'Redguard', 'Nord', 'Khajiit', 'Orc', 'Orc', 'Bosmer', 'Bosmer', 'Dunmer', 'Dunmer', 'Khajiit', 'Breton'), ('Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female', 'Male', 'Male'), ('Namvar', 'Miharaz', 'Rulfzub', 'Audric', 'Rogurog

361 [('Breton', 'Breton', 'Imperial', 'Nord', 'Nord', 'Nord', 'Orc', 'Dunmer', 'Breton', 'Khajiit', 'Bosmer', 'Khajiit', 'Argonian', 'Orc', 'Argonian', 'Redguard', 'Nord', 'Imperial', 'Nord', 'Nord', 'Breton', 'Khajiit', 'Bosmer', 'Altmer', 'Breton', 'Dunmer', 'Altmer', 'Orc', 'Argonian', 'Bosmer', 'Imperial', 'Breton', 'Altmer', 'Altmer', 'Altmer', 'Khajiit', 'Altmer', 'Bosmer', 'Khajiit', 'Bosmer', 'Bosmer', 'Dunmer', 'Redguard', 'Khajiit', 'Dunmer', 'Bosmer', 'Breton', 'Nord', 'Redguard', 'Orc'), ('Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Male', 'Female', 'Male', 'Male'), ('Marchel', 'Malielle', 'Arius', 'Hoki', 'Hosgunn', 'Sa

In [20]:
from torch.utils.data import Dataset
import torch

class NumbersDataset(Dataset):
    def __init__(self, low, high):
        self.samples = list(range(low, high))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        n = self.samples[idx]
        successors = torch.arange(4).float() + n + 1
        noisy = torch.randn(4) + successors
        return n, successors, noisy


from torch.utils.data import DataLoader

dataset = NumbersDataset(100, 120)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
print(next(iter(dataloader)))

[tensor([107, 113, 110, 100, 112, 119, 117, 101, 111, 108]), tensor([[108., 109., 110., 111.],
        [114., 115., 116., 117.],
        [111., 112., 113., 114.],
        [101., 102., 103., 104.],
        [113., 114., 115., 116.],
        [120., 121., 122., 123.],
        [118., 119., 120., 121.],
        [102., 103., 104., 105.],
        [112., 113., 114., 115.],
        [109., 110., 111., 112.]]), tensor([[106.7339, 108.0328, 108.1583, 110.3603],
        [113.1134, 112.7986, 116.1238, 118.2518],
        [111.6271, 113.6644, 111.9994, 114.6567],
        [102.5171, 103.3511, 102.4026, 102.6091],
        [113.0629, 114.9223, 114.4135, 115.4583],
        [121.2697, 121.2336, 122.4660, 122.8412],
        [117.6345, 118.7719, 119.4575, 122.0959],
        [102.1951, 102.3860, 104.9529, 103.6430],
        [111.7986, 112.5359, 113.9334, 116.0126],
        [111.1650, 111.0419, 110.9979, 111.9770]])]


In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))