In [1]:
#hide
#skip
%config Completer.use_jedi = False
%config IPCompleter.greedy=True
# upgrade fastrl on colab
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [2]:
# hide
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [3]:
# default_exp fastai.data.load

In [4]:
# export
# Python native modules
import os
from typing import Callable
# Third party libs
from fastcore.all import *
import torchdata.datapipes as dp
from torch.utils.data.dataloader_experimental import DataLoader2
from torch.utils.data.graph import traverse
# Local modules
from fastrl.fastai.data.loop.core import *

from fastrl.fastai.data.pipes.map.mux import *
from fastrl.fastai.data.pipes.map.demux import *

# Loading
> Objects using the `Loop` and `DataPipe` API for DataLoading

We will replicate [fastai mnist loading](https://colab.research.google.com/github/fastai/fastbook/blob/master/04_mnist_basics.ipynb).

In [5]:
from fastai.vision.all import untar_data,URLs,get_image_files,PILImage,ToTensor

path = untar_data(URLs.MNIST_SAMPLE)

(path/'train').ls()

(#2) [Path('/home/fastrl_user/.fastai/data/mnist_sample/train/7'),Path('/home/fastrl_user/.fastai/data/mnist_sample/train/3')]

First we create the dataset...

In [6]:
# export
class TypeTransformLoop(dp.map.MapDataPipe):
    def __init__(self,datapipe, type_tfms):
        self.type_tfms,self.datapipe = Pipeline(type_tfms),datapipe
    
    @callback_getitem
    def __getitem__(self, index):
        data = self.datapipe[index]
        return self.type_tfms(data)
            
    def __len__(self): return len(self.datapipe)
    
class ItemTransformLoop(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe, item_tfms:List[Callable]): 
        self.item_tfms,self.source_datapipe = Pipeline(item_tfms),source_datapipe
    @callback_iter
    def __iter__(self):
        for data in self.source_datapipe:
            yield self.item_tfms(data)
    
    
class BatchTransformLoop(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe, batch_tfms):
        self.batch_tfms,self.source_datapipe = Pipeline(batch_tfms),source_datapipe
    @callback_iter
    def __iter__(self):
        for data in self.source_datapipe:
            yield self.batch_tfms(data)

In [7]:
# export
def default_loader_loop(
    items,
    splitter,
    cbs=None,
    type_tfms=None,
    item_tfms=None,
    batch_tfms=None,
    bs=2
):
    type_tfms = ifnone(type_tfms,L())
    pipe = dp.map.SequenceWrapper(items)
    train_dp,valid_dp = DemultiplexerMapDataPipe(
        pipe,
        num_instances=2,
        classifier_fn=splitter,
        drop_none=True
    )
    train_dp,valid_dp = L(train_dp,valid_dp).map(TypeTransformLoop, type_tfms=type_tfms)
    train_dp,valid_dp = L(train_dp,valid_dp).map(Self.shuffle())
    train_dp,valid_dp = L(train_dp,valid_dp).map(dp.iter.MapToIterConverter)
    train_dp,valid_dp = L(train_dp,valid_dp).map(ItemTransformLoop, item_tfms=ifnone(item_tfms,L()))
    train_dp,valid_dp = train_dp.batch(bs),valid_dp.batch(bs)
    return train_dp,valid_dp

In [8]:
# export
def GrandparentSplitter(train='train',valid='valid'):
    def splitter(item):
        if all(s not in item.parts for s in (train,valid)): return None
        if item.is_dir(): return None
        # valid=1, train=0
        return valid in item.parts 
    return splitter

In [9]:
base = default_loader_loop(
    L(path.rglob('*')),
    GrandparentSplitter(),
    type_tfms = L(PILImage.create,ToTensor)
)

In [10]:

# default_constructor(
#     dp.iter.Zipper(*base),
    
# )

In [11]:
DataLoader2?

[0;31mInit signature:[0m
[0mDataLoader2[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdataset[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_size[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshuffle[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msampler[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_sampler[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_workers[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcollate_fn[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpin_memory[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdrop_last[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtimeout[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mworker_init_fn[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34

In [12]:
train_dl,valid_dl = DataLoader2(base[0]),DataLoader2(base[1])

In [13]:
for element in train_dl:
    print(element)
    break

[TensorImage([[[[0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               ...,
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0]],

              [[0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               ...,
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0]],

              [[0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               ...,
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0]]]], dtype=torch.uint8), TensorImage([[[[0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               [0, 0, 0,  ..., 0, 0, 0],
               ...,
               [0, 0, 0,  ..., 

  return torch.stack(batch, 0, out=out)


In [17]:
Transform??

[0;31mInit signature:[0m [0mTransform[0m[0;34m([0m[0mself[0m[0;34m,[0m [0menc[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mdec[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0msplit_idx[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0morder[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mTransform[0m[0;34m([0m[0mmetaclass[0m[0;34m=[0m[0m_TfmMeta[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"Delegates (`__call__`,`decode`,`setup`) to (<code>encodes</code>,<code>decodes</code>,<code>setups</code>) if `split_idx` matches"[0m[0;34m[0m
[0;34m[0m    [0msplit_idx[0m[0;34m,[0m[0minit_enc[0m[0;34m,[0m[0morder[0m[0;34m,[0m[0mtrain_setup[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;32mNone[0m[0;34m,[0m[0;36m0[0m[0;34m,[0m[0;32mNone[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__init__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0menc[0m[0;34m=[0m[0;32mNone[0m[

In [None]:
# hide
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.export import *
    from nbdev.export2html import *
    from nbdev.cli import *
    make_readme()
    notebook2script(silent=True)