# Lecture 08: PyTorch DataLoader

In [1]:
import numpy as np
import torch
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [9]:
class DiabetesDataset(Dataset):
    # 1. download, read data, etc.
    def __init__(self):
        xy = np.loadtxt("data-diabetes.csv", delimiter=",", dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, 0:-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])

    # 2. return one item on the index
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # 3. return the data length
    def __len__(self):
        return self.len

dataset = DiabetesDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=32,
                          shuffle=True,
                          num_workers=2)

In [3]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.l1 = torch.nn.Linear(8, 6)
        self.l2 = torch.nn.Linear(6, 4)
        self.l3 = torch.nn.Linear(4, 1)

        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))

        return y_pred

In [15]:
model = Model()

criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
epochs = 2
for epoch in range(1, epochs+1):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(inputs)


        # Compute and print loss
        loss = criterion(y_pred, labels)
        print(f"{epoch} Epoch {i} Iters : loss = {loss.item()}")

        # Zero gradients, perform a backward pass, and update the weigths.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print()

1 Epoch 0 Iters : loss = 0.6750399470329285
1 Epoch 1 Iters : loss = 0.6718722581863403
1 Epoch 2 Iters : loss = 0.6909482479095459
1 Epoch 3 Iters : loss = 0.6763814091682434
1 Epoch 4 Iters : loss = 0.6832721829414368
1 Epoch 5 Iters : loss = 0.6912857890129089
1 Epoch 6 Iters : loss = 0.6420340538024902
1 Epoch 7 Iters : loss = 0.675744891166687
1 Epoch 8 Iters : loss = 0.6444568634033203
1 Epoch 9 Iters : loss = 0.6762385368347168
1 Epoch 10 Iters : loss = 0.6615130305290222
1 Epoch 11 Iters : loss = 0.650576651096344
1 Epoch 12 Iters : loss = 0.6647963523864746
1 Epoch 13 Iters : loss = 0.6527439951896667
1 Epoch 14 Iters : loss = 0.6883065700531006
1 Epoch 15 Iters : loss = 0.6780675053596497
1 Epoch 16 Iters : loss = 0.6475871205329895
1 Epoch 17 Iters : loss = 0.6998685002326965
1 Epoch 18 Iters : loss = 0.6802727580070496
1 Epoch 19 Iters : loss = 0.6317934989929199
1 Epoch 20 Iters : loss = 0.650439441204071
1 Epoch 21 Iters : loss = 0.6613128781318665
1 Epoch 22 Iters : loss

## The following dataset loaders are [available](https://pytorch.org/vision/stable/datasets.html)
- MNIST and FashionMNIST
- COCO (Captioning and Detection)
- LSUN Classification
- ImageFolder
- Imagenet
- CIFAR10 and CIFAR100
- STL10
- SVHN
- PhotoTour

### MNIST dataset loading

In [17]:
from torchvision import datasets
from torchvision.transforms import ToTensor

In [18]:
# MNIST Dataset
train_dataset = datasets.MNIST(root="./data/",
                               train=True,
                               transform=ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root="./data/",
                              train=False,
                              transform=ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [None]:
batch_size = 32

# Data Loader (Input Pipeline)
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=False)

for batch_idx, (data, target) in enumerate(train_loader):
    data, target = Variable(data), Variable(target)
    # ...
    
# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/logistic_regression/main.py