In [None]:
#default_exp fastai.datasets

# Dataset Errors
> Errors relating to fastai `Datasets` objects

Any issues regarding the building or calling of `Datasets` objects will raise one of these issues to help narrow down what went wrong

In [None]:
#export
from fastcore.basics import patch, store_attr
from fastcore.foundation import L, mask2idxs
from fastcore.transform import Pipeline
from fastcore.xtras import is_listy

from fastai.imports import pv
from fastai.data.core import TfmdLists

  return torch._C._cuda_getDeviceCount() > 0


In [None]:
#export
@patch
def __init__(self:TfmdLists, items, tfms, use_list=None, do_setup=True, split_idx=None, train_setup=True,
                splits=None, types=None, verbose=False, dl_type=None):
    if items is None or len(items) == 0: raise IndexError('Items passed in either has a length of zero or is None')
    super(TfmdLists, self).__init__(items, use_list=use_list)
    if dl_type is not None: self._dl_type = dl_type
    self.splits = L([slice(None),[]] if splits is None else splits).map(mask2idxs)
    if isinstance(tfms,TfmdLists): tfms = tfms.tfms
    if isinstance(tfms,Pipeline): do_setup=False
    self.tfms = Pipeline(tfms, split_idx=split_idx)
    store_attr('types,split_idx')
    if do_setup:
        pv(f"Setting up {self.tfms}", verbose)
        self.setup(train_setup=train_setup)

`TfmdLists` now will raise an error if `items` is either `None` or has a length of zero

As an example let's try and build a `Dataset` with no items:

In [None]:
#failing
from fastai.data.core import Datasets

dset = Datasets([])

IndexError: Items passed in either has a length of zero or is None

In [None]:
#export
def subset_error(e:IndexError, i:int) -> IndexError:
    """
    IndexError when attempting to grab a non-existant subset in the dataset at index `i`
    """
    args = e.args[0]
    err = f'Tried to grab subset {i} in the Dataset, but it contains no items.\n\n'
    err += args
    e.args = [err]
    raise e

In [None]:
#export
@patch
def subset(self:TfmdLists, i:int):
    "New `TfmdLists` with same tfms that only includes items in `i`th split"
    try: return self._new(self._get(self.splits[i]), split_idx=i)
    except IndexError as e: subset_error(e, i)

`subset_error` adds onto our previous `IndexError`, by also raising an error for when we try and grab a particular subset of the `Dataset`, such as `dset.train`, `dset.valid`, or `dset.subset(i)`.

Let's look at an example below, where we only have a training dataset but no validation (as we passed no splits in)

In [None]:
#failing

dset = Datasets([0,1,2])
dset.valid

IndexError: Tried to grab subset 1 in the Dataset, but it contains no items.

Items passed in either has a length of zero or is None

In [None]:
#export
@patch
def setup(self:TfmdLists, train_setup=True):
    "Transform setup with self"
    self.tfms.setup(self, train_setup)
    if len(self) != 0:
        x = super(TfmdLists, self).__getitem__(0) if self.splits is None else super(TfmdLists, self).__getitem__(self.splits[0])[0]
        self.types = []
        for f in self.tfms.fs:
            self.types.append(getattr(f, 'input_types', type(x)))
            x = f(x)
        self.types.append(type(x))
    t = getattr(self, 'types', [])
    if t is None or len(t) == 0: raise Exception("The stored dataset contains no items and `self.types` has not been setup yet")
    types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()
    self.pretty_types = '\n'.join([f'  - {t}' for t in types])