In [None]:
#default_exp inference.export

# inference.export
> This module contains the main functionality for extracting the transform parameters from DataLoaders

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
from fastai2.vision.all import *

## Vision

For an example we will look at the pets dataset. We will define a series of transforms in our Pipelines, and we will attempt to extract them.

In [None]:
path = untar_data(URLs.PETS)
fnames = get_image_files(path/'images')
pat = r'(.+)_\d+.jpg$'
batch_tfms = [*aug_transforms(size=224, max_warp=0.8), Normalize.from_stats(*imagenet_stats)]
item_tfms = RandomResizedCrop(460, min_scale=0.75, ratio=(1.,1.))
bs=64
dls = ImageDataLoaders.from_name_re(path, fnames, pat, batch_tfms=batch_tfms, 
                                   item_tfms=item_tfms, bs=bs)

In [None]:
#export
def _gen_dict(tfm):
    "Grabs the `attrdict` and transform name from `tfm`"
    tfm_dict = attrdict(tfm, *tfm.store_attrs.split(','))
    if 'partial' in tfm.name:
        tfm_name = tfm.name[1].split(' --')[0]
    else:
        tfm_name = tfm.name.split(' --')[0]
    return tfm_dict, tfm_name

In [None]:
dls.tfms[1]

Pipeline: partial -> Categorize -- {'vocab': (#37) ['Abyssinian','Bengal','Birman','Bombay','British_Shorthair','Egyptian_Mau','Maine_Coon','Persian','Ragdoll','Russian_Blue'...], 'add_na': False}

In [None]:
#export
def _make_tfm_dict(tfms, type_tfm=False):
    "Extracts transform params from `tfms`"
    tfm_dicts = {}
    for tfm in tfms:
        if hasattr(tfm, 'store_attrs') and not isinstance(tfm, AffineCoordTfm):
            if type_tfm or tfm.split_idx is not 0:
                tfm_dict,name = _gen_dict(tfm)
                tfm_dicts[name] = tfm_dict
    return tfm_dicts

In [None]:
#hide
test_eq(len(_make_tfm_dict(dls.tfms, True)), 1)
ab_dict = _make_tfm_dict(dls.after_batch)
in_('Normalize', ab_dict.keys());
not in_('Flip', ab_dict.keys());
it_dict = _make_tfm_dict(dls.after_item)
in_('RnadomResizedCrop', ab_dict.keys())
not in_('ToTensor', ab_dict.keys());

In [None]:
#export
@typedispatch
def _extract_tfm_dicts(dl:TfmdDL):
    "Extracts all transform params from `dl`"
    type_tfm = True
    attrs = ['tfms','after_item','after_batch']
    tfm_dicts = {}
    for attr in attrs:
        tfm_dicts[attr] = _make_tfm_dict(getattr(dl, attr), type_tfm)
        type_tfm = False
    return tfm_dicts

In [None]:
#export
def get_information(dls): return _extract_tfm_dicts(dls[0])

### get_information

This function will take any set of `DataLoaders` and extract the transforms which are important during inference and their information

In [None]:
tfm_info = get_information(dls)

In [None]:
#hide
test_eq(len(tfm_info),3)
test_eq(tfm_info.keys(), ['tfms','after_item','after_batch'])

For vision it will contain `tfms`, `after_item`, and `after_batch`

In [None]:
tfm_info['tfms']

{'Categorize': {'add_na': False,
  'vocab': (#37) ['Abyssinian','Bengal','Birman','Bombay','British_Shorthair','Egyptian_Mau','Maine_Coon','Persian','Ragdoll','Russian_Blue'...]}}

We can see here are the type transforms

In [None]:
tfm_info['after_item']

{'RandomResizedCrop': {'min_scale': 0.75,
  'ratio': (1.0, 1.0),
  'size': (460, 460),
  'val_xtra': 0.14}}

The item transforms

In [None]:
tfm_info['after_batch']

{'IntToFloatTensor': {'div': 255.0, 'div_mask': 1},
 'Normalize': {'axes': (0, 2, 3), 'mean': tensor([[[[0.4850]],
  
           [[0.4560]],
  
           [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],
  
           [[0.2240]],
  
           [[0.2250]]]], device='cuda:0')}}

And our batch transforms

## Tabular

Next we'll look at a tabular example. We will use the `ADULT_SAMPLE` dataset here:

In [None]:
#export
from fastai2.tabular.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)

In [None]:
df = pd.read_csv(path/'adult.csv')
splits = RandomSplitter()(range_of(df))
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'

In [None]:
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, splits=splits)
dls = to.dataloaders()

In [None]:
#export
@typedispatch
def _extract_tfm_dicts(dl:TabDataLoader):
    "Extracts all transform params from `dl`"
    types = 'normalize,fill_missing,categorify'
    if hasattr(dl, 'categorize'): types += ',categorize'
    if hasattr(dl, 'regression_setup'): types += ',regression_setup'
    tfms = {}
    for t in types.split(','):
        tfm = getattr(dl, t)
        tfms[t] = attrdict(tfm, *tfm.store_attrs.split(','))
    return tfms

The usage is the exact same:

In [None]:
tfm_dicts = get_information(dls)

In [None]:
#hide
test_eq(len(tfm_dicts),4)

However our keys are different. By default it will have `normalize`, `fill_missing`, and `categorify`, and then depending on what is available it will store either `categorize` or `regression_setup` to tell us about our outputs.

Here is an example from `normalize`:

In [None]:
tfm_dicts['normalize']

{'means': {'age': 38.54896541134017,
  'education-num': 10.086989903643135,
  'fnlwgt': 189806.53975200583},
 'stds': {'age': 13.61623631055053,
  'education-num': 2.5541612422904,
  'fnlwgt': 105132.50000965082}}

`fill_missing`:

In [None]:
tfm_dicts['fill_missing']

{'add_col': True,
 'fill_strategy': 'median',
 'fill_vals': defaultdict(int, {'education-num': 0}),
 'na_dict': {'education-num': 10.0}}

And `categorify`:

In [None]:
tfm_dicts['categorify']

{'classes': {'education': (#17) ['#na#',' 10th',' 11th',' 12th',' 1st-4th',' 5th-6th',' 7th-8th',' 9th',' Assoc-acdm',' Assoc-voc'...],
  'education-num_na': (#3) ['#na#',False,True],
  'marital-status': (#8) ['#na#',' Divorced',' Married-AF-spouse',' Married-civ-spouse',' Married-spouse-absent',' Never-married',' Separated',' Widowed'],
  'occupation': (#16) ['#na#',' ?',' Adm-clerical',' Armed-Forces',' Craft-repair',' Exec-managerial',' Farming-fishing',' Handlers-cleaners',' Machine-op-inspct',' Other-service'...],
  'race': (#6) ['#na#',' Amer-Indian-Eskimo',' Asian-Pac-Islander',' Black',' Other',' White'],
  'relationship': (#7) ['#na#',' Husband',' Not-in-family',' Other-relative',' Own-child',' Unmarried',' Wife'],
  'workclass': (#10) ['#na#',' ?',' Federal-gov',' Local-gov',' Never-worked',' Private',' Self-emp-inc',' Self-emp-not-inc',' State-gov',' Without-pay']}}

Before finally `categorize` (since we have a classification problem):

In [None]:
tfm_dicts['categorize']

{'add_na': False, 'vocab': (#2) ['<50k','>=50k']}

## Exporting

To export, a new `to_fastinference` function has been made

In [None]:
doc(Learner.to_fastinference)

Learner.to_fastinference(x:fastai2.learner.Learner, data_fname='data', model_fname='model', path=Path('.'))
Export data for `fastinference_onnx` or `_pytorch` to use

To get a prettier result with hyperlinks to source code and documentation, install nbdev: pip install nbdev


Params:

* `data_fname`: Filename to save our extracted `DataLoader` information, default is `data`
* `model_fname`: Filename to save our current model, default is `model`
* `path`: Path to save our model and data to, default is `.`

Exported files will have the extension `.pkl`

In [None]:
#export
@patch
def to_fastinference(x:Learner, data_fname='data', model_fname='model', path=Path('.')):
    "Export data for `fastinference_onnx` or `_pytorch` to use"
    dicts = get_information(x.dls)
    with open(path/f'{data_fname}.pkl', 'wb') as handle:
        pickle.dump(procs, handle, protocol=pickle.HIGHEST_PROTOCOL)
    x._end_cleanup()
    state = x.opt.state_dict() if x.opt is not None else None
    x.opt = None
    torch.save(x.model.state_dict(), path/f'{model_fname}.pkl')

In [None]:
learn = tabular_learner(dls, layers=[200,100])

Simply call `learn.to_fastinference` and it will export everything needed for `fastinference_pytorch` or `fastinference_onnx`

In [None]:
learn.to_fastinference(data_fname = 'data', model_fname = 'model', path = Path('.'))

In [None]:
#hide
"""
# TODO: Text
Things to save:


* `data.vocab`
* `data.o2i`
* Tokenizer
* All the rules in `text.core`:
[<function fastai2.text.core.fix_html>,
 <function fastai2.text.core.replace_rep>,
 <function fastai2.text.core.replace_wrep>,
 <function fastai2.text.core.spec_add_spaces>,
 <function fastai2.text.core.rm_useless_spaces>,
 <function fastai2.text.core.replace_all_caps>,
 <function fastai2.text.core.replace_maj>,
 <function fastai2.text.core.lowercase>]

- Ensure that `L` is in the library
"""