### iter 内置函数

In [1]:
a = [1, 2, 3]
a

[1, 2, 3]

In [5]:
it = iter(a)
next(it), next(it), next(it)

(1, 2, 3)

### TensorDataset 对象

In [6]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [7]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)
type(features), type(labels)

(torch.Tensor, torch.Tensor)

In [9]:
dataset = data.TensorDataset(features, labels)

In [13]:
features.shape

torch.Size([1000, 2])

In [14]:
features[:10]

tensor([[-0.5508, -1.0643],
        [-0.1805, -0.6048],
        [ 0.0414,  0.2144],
        [-0.2203,  1.0309],
        [ 0.3030,  0.2973],
        [ 1.4278,  1.6690],
        [-0.1587, -2.0298],
        [-0.4854,  0.1182],
        [-0.2017, -0.5720],
        [-1.0073,  1.5268]])

In [15]:
labels[:10]

tensor([[ 6.7137],
        [ 5.8925],
        [ 3.5648],
        [ 0.2666],
        [ 3.7937],
        [ 1.3820],
        [10.7859],
        [ 2.8458],
        [ 5.7555],
        [-3.0155]])

In [16]:
# Dataset 对象
# 会把 features 和 labels 两个 tensor 拼接在一个元组里
# 并且支持使用切片的方式获取 dataset 中的元素
# 返回的元素类型是一个元组
# 元组的第 1 个元素是 features, 第 2 个元素是对应的 labels
dataset[:10]

(tensor([[-0.5508, -1.0643],
         [-0.1805, -0.6048],
         [ 0.0414,  0.2144],
         [-0.2203,  1.0309],
         [ 0.3030,  0.2973],
         [ 1.4278,  1.6690],
         [-0.1587, -2.0298],
         [-0.4854,  0.1182],
         [-0.2017, -0.5720],
         [-1.0073,  1.5268]]),
 tensor([[ 6.7137],
         [ 5.8925],
         [ 3.5648],
         [ 0.2666],
         [ 3.7937],
         [ 1.3820],
         [10.7859],
         [ 2.8458],
         [ 5.7555],
         [-3.0155]]))

### Dataloader 对象

Data loader. Combines a dataset and a sampler, and provides an iterable over
the given dataset

```
data.DataLoader(
    dataset: torch.utils.data.dataset.Dataset[+T_co],
    batch_size: Optional[int] = 1,
    shuffle: bool = False,
    sampler: Union[torch.utils.data.sampler.Sampler, Iterable, NoneType] = None,
    batch_sampler: Union[torch.utils.data.sampler.Sampler[Sequence], Iterable[Sequence], NoneType] = None,
    num_workers: int = 0,
    collate_fn: Optional[Callable[[List[~T]], Any]] = None,
    pin_memory: bool = False,
    drop_last: bool = False,
    timeout: float = 0,
    worker_init_fn: Optional[Callable[[int], NoneType]] = None,
    multiprocessing_context=None,
    generator=None,
    *,
    prefetch_factor: int = 2,
    persistent_workers: bool = False,
)
```

In [23]:
# TensorDataSet 对象仅仅是对数据做了一个整理合并
# TensorDataSet 对象可以进行遍历但是每次遍历出来的数据个数不能自己指定
# DataLoader 对象可以生成一个迭代器
# 并且可以指定一次遍历出来多少条传入的 dataset 中的样本数据
data_loader = data.DataLoader(dataset, 10, shuffle=True)