# Dataset configuration

In [1]:
!date

2020年 1月19日 日曜日 18時24分35秒 JST


In [2]:
import torch
import torch.utils.data
from torchvision import datasets, transforms

# Data loader

In [33]:
def init_dataloader(root="../data/", cuda=False, batch_size=128):
    kwargs = {"num_workers": 1, "pin_memory": True} if cuda else {}
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambd=lambda x: x[0]),
    ])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(root, train=True, download=True, transform=transform),
        batch_size=batch_size, shuffle=True, **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(root, train=False, transform=transform),
        batch_size=batch_size, shuffle=False, **kwargs,
    )

    return train_loader, test_loader

In [34]:
train_loader, test_loader = init_dataloader()

In [53]:
? train_loader

[0;31mType:[0m        DataLoader
[0;31mString form:[0m <torch.utils.data.dataloader.DataLoader object at 0x1223e8410>
[0;31mLength:[0m      469
[0;31mFile:[0m        ~/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/dataloader.py
[0;31mDocstring:[0m  
Data loader. Combines a dataset and a sampler, and provides an iterable over
the given dataset.

The :class:`~torch.utils.data.DataLoader` supports both map-style and
iterable-style datasets with single- or multi-process loading, customizing
loading order and optional automatic batching (collation) and memory pinning.

See :py:mod:`torch.utils.data` documentation page for more details.

Arguments:
    dataset (Dataset): dataset from which to load the data.
    batch_size (int, optional): how many samples per batch to load
        (default: ``1``).
    shuffle (bool, optional): set to ``True`` to have the data reshuffled
        at every epoch (default: ``False``).
    sampler (Sampler, optional): defines the 

In [54]:
len(train_loader)

469

In [55]:
len(train_loader.dataset)

60000

In [57]:
train_loader.dataset.data.shape

torch.Size([60000, 28, 28])

In [35]:
_x, _ = iter(train_loader).next()

In [36]:
_x.shape

torch.Size([128, 28, 28])

In [37]:
_x.transpose(0, 1).shape

torch.Size([28, 128, 28])

In [38]:
train_loader.batch_size

128

In [48]:
h = torch.ones(10)

In [51]:
torch.cat([h, h], dim=-1)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])

In [49]:
h.shape

torch.Size([10])

# Other data loader

In [3]:
def init_dataloader(root="../data/", cuda=False, batch_size=128):
    kwargs = {"num_workers": 1, "pin_memory": True} if cuda else {}
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambd=lambda x: x.view(-1)),
    ])

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(root, train=True, download=True, transform=transform),
        batch_size=batch_size, shuffle=True, **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(root, train=False, transform=transform),
        batch_size=batch_size, shuffle=False, **kwargs,
    )

    return train_loader, test_loader

In [4]:
train_loader, test_loader = init_dataloader()

In [5]:
train_loader.dataset.data.shape

torch.Size([60000, 28, 28])

In [6]:
_x, _ = iter(train_loader).next()

In [7]:
_x.shape

torch.Size([128, 784])

In [8]:
train_loader.dataset.train_data.shape



torch.Size([60000, 28, 28])

In [None]:
train_loader.dataset.