In [None]:
# default_exp tensor

# 01_Tensor

> Building an example `Dataset` and `DataLoader` with PyTorch `Tensors`

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
from fastai2.tabular.all import *

For our data we'll first utilize `TabularPandas` for pre-processing, as there has not been any modifications to `TabularPandas` yet.

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [None]:
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'
splits = RandomSplitter()(range_of(df))

In [None]:
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, splits=splits)

# TensorDataLoaders

In [None]:
#export
class TensorDataset():
    "A `Tensor` dataset object from `TabularPandas`"
    def __init__(self, to:TabularPandas, device='cpu'):
        self.cats = tensor(to.cats.to_numpy()).to(device=device, dtype=torch.long)
        self.conts = tensor(to.conts.to_numpy()).to(device=device, dtype=torch.float32)
        self.ys = tensor(to.ys.to_numpy()).to(device)
        self.device = device
    
    def __getitem__(self, idx): 
        idx = idx[0]
        return self.cats[idx:idx+self.bs], self.conts[idx:idx+self.bs], self.ys[idx:idx+self.bs]
    
    def __len__(self): return len(self.cats)

In [None]:
show_doc(TensorDataset)

<h2 id="TensorDataset" class="doc_header"><code>class</code> <code>TensorDataset</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>TensorDataset</code>(**`to`**:`TabularPandas`, **`device`**=*`'cpu'`*)

A `Tensor` dataset object from `TabularPandas`

In [None]:
#hide
ds = TensorDataset(to)

In [None]:
#hide
ds.bs = 3
a,b,c = ds[[0]]
test_eq(len(a), 3)

In [None]:
#export
class TensorDataLoader(DataLoader):
    "A `DataLoader` for a `TensorDataset`"
    def __init__(self, dataset, bs=1, **kwargs):
        super().__init__(dataset, bs=bs, **kwargs)
        self.dataset.bs = bs
        
    def create_item(self, s): return s
    
    def create_batch(self, b):
        cat, cont, y = self.dataset[b]
        return cat.to(self.device), cont.to(self.device), y.to(self.device)

In [None]:
show_doc(TensorDataLoader)

<h2 id="TensorDataLoader" class="doc_header"><code>class</code> <code>TensorDataLoader</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>TensorDataLoader</code>(**`dataset`**, **`bs`**=*`1`*, **\*\*`kwargs`**) :: `DataLoader`

A `DataLoader` for a [`TensorDataset`](/fastai2_tabular_hybrid/Tensor#TensorDataset)

In [None]:
#hide
ds = TensorDataset(to)
dl = TensorDataLoader(ds, bs=3)
batch = next(iter(dl))
test_eq(len(dl), len(ds)//3+1)

In [None]:
#export
@patch
def shuffle_fn(x:TensorDataLoader):
    "Shuffle the interior dataset"
    rng = torch.randperm(len(x.dataset))
    x.dataset.cats = x.dataset.cats[rng]
    x.dataset.conts = x.dataset.conts[rng]
    x.dataset.ys = x.dataset.ys[rng]

In [None]:
#export
@patch
def get_idxs(x:TensorDataLoader):
    "Get index's to select"
    idxs = Inf.count if x.indexed else Inf.nones
    if x.n is not None: idxs = list(range(len(x.dataset)))
    if x.shuffle: x.shuffle_fn()
    return idxs

Compare performance between fastai DataLoader, and both CPU and GPU TensorDataLoader.

In [None]:
train_ds = TensorDataset(to.train)
valid_ds = TensorDataset(to.valid)

In [None]:
train_dl = TensorDataLoader(train_ds, bs=64, shuffle=True, drop_last=True)
valid_dl = TensorDataLoader(valid_ds, bs=64)

In [None]:
train_ds_gpu = TensorDataset(to.train, 'cuda')
valid_ds_gpu = TensorDataset(to.valid, 'cuda')

In [None]:
train_dl_gpu = TensorDataLoader(train_ds_gpu, bs=64, shuffle=True, drop_last=True, device='cuda')
valid_dl_gpu = TensorDataLoader(valid_ds_gpu, bs=64, device='cuda')

In [None]:
dls = to.dataloaders(bs=64)

In [None]:
%%timeit
# Tensor CPU
for _ in train_dl: pass

28.3 ms ± 62.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%%timeit
# Tensor GPU
for _ in train_dl_gpu: pass

34.1 ms ± 31.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%%timeit
# fastai
for _ in dls[0]: pass

1.1 s ± 2.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit
# Tensor CPU
for _ in valid_dl: pass

6.63 ms ± 4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%%timeit
# Tensor GPU
for _ in valid_dl_gpu: pass

8.35 ms ± 3.54 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%%timeit
# fastai
for _ in dls[1]: pass

271 ms ± 658 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
# export
class TensorDataLoaders(DataLoaders):
    "A set of two `TensorDataLoader` from a `TabularPandas` object"
    def __init__(self, to:TabularPandas, bs=64, val_bs=None, shuffle_train=True,
                 device='cpu', dataset_device=None, **kwargs):
        dataset_device = device if dataset_device is None else dataset_device
        train_ds = TensorDataset(to.train, dataset_device)
        valid_ds = TensorDataset(to.valid, dataset_device)
        val_bs = bs*2 if val_bs is None else val_bs
        train = TensorDataLoader(train_ds, bs=bs, shuffle=shuffle_train, device=device, drop_last=True, **kwargs)
        valid = TensorDataLoader(valid_ds, bs=val_bs, shuffle=False, device=device, **kwargs)
        super().__init__(train, valid, device=device, **kwargs)

In [None]:
show_doc(TensorDataLoaders)

<h2 id="TensorDataLoaders" class="doc_header"><code>class</code> <code>TensorDataLoaders</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>TensorDataLoaders</code>(**`to`**:`TabularPandas`, **`bs`**=*`64`*, **`val_bs`**=*`None`*, **`shuffle_train`**=*`True`*, **`device`**=*`'cpu'`*, **`dataset_device`**=*`None`*, **\*\*`kwargs`**) :: `DataLoaders`

A set of two [`TensorDataLoader`](/fastai2_tabular_hybrid/Tensor#TensorDataLoader) from a `TabularPandas` object

`TensorDataLoaders` show a 20x speedup compared to `TabularPandas`. In general this `DataLoader` will set up your entire dataset on either the GPU or CPU based on `device`. Use `CPU` if the dataset will not fit into memory

Compare performance between DataLoaders when batching to GPU.

In [None]:
dls = TensorDataLoaders(to, dataset_device='cpu', device='cuda')

In [None]:
%%timeit
# Tensor CPU to GPU
for _ in dls[0]: pass

52.6 ms ± 57.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
dls = TensorDataLoaders(to, device='cuda')

In [None]:
%%timeit
# Tensor GPU to GPU
for _ in dls[0]: pass

33.8 ms ± 49.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
dls = to.dataloaders(bs=64, device='cuda')

In [None]:
%%timeit
# fastai to GPU
for _ in dls[0]: pass

1.11 s ± 441 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
