In [None]:
#default_exp inference.export

# inference.export
> This module contains the main functionality for extracting the transform parameters from DataLoaders

In [None]:
#hide

from nbdev.showdoc import *

In [None]:
#export
from fastai2.vision.all import *

## Vision

For an example we will look at the pets dataset. We will define a series of transforms in our Pipelines, and we will attempt to extract them.

In [None]:
path = untar_data(URLs.PETS)
fnames = get_image_files(path/'images')
pat = r'(.+)_\d+.jpg$'
batch_tfms = [*aug_transforms(), Normalize.from_stats(*imagenet_stats)]
item_tfms = RandomResizedCrop(460, min_scale=0.75, ratio=(1.,1.))
bs=64
dls = ImageDataLoaders.from_name_re(path, fnames, pat, batch_tfms=batch_tfms, 
                                   item_tfms=item_tfms, bs=bs)

In [None]:
# Cell
def to_list(b):
    "Recursively make any `L()` or CategoryMap to list"
    def _inner(o): 
        if isinstance(o,L) or isinstance(o, CategoryMap):
            return list(o)
        elif isinstance(o, Tensor):
            return np.array(to_detach(o))
        else: return o
    for k in b.keys():
        b[k] = apply(_inner,b[k])
    return b

In [None]:
#export
def _gen_dict(tfm):
    "Grabs the `attrdict` and transform name from `tfm`"
    tfm_dict = attrdict(tfm, *tfm.store_attrs.split(','))
    if 'partial' in tfm.name:
        tfm_name = tfm.name[1].split(' --')[0]
    else:
        tfm_name = tfm.name.split(' --')[0]
    return tfm_dict, tfm_name

In [None]:
#export
def _make_tfm_dict(tfms, type_tfm=False):
    "Extracts transform params from `tfms`"
    tfm_dicts = {}
    for tfm in tfms:
        if hasattr(tfm, 'store_attrs') and not isinstance(tfm, AffineCoordTfm):
            if type_tfm or tfm.split_idx is not 0:
                tfm_dict,name = _gen_dict(tfm)
                tfm_dict = to_list(tfm_dict)
                tfm_dicts[name] = tfm_dict
    return tfm_dicts

In [None]:
dls.after_batch[2].fs[1].__dict__

{'max_lighting': 0.2,
 'p': 0.75,
 'draw': None,
 'batch': False,
 'change': tensor([1.0732], device='cuda:0')}

In [None]:
_make_tfm_dict(dls.after_item)

{'RandomResizedCrop': {'size': (460, 460),
  'min_scale': 0.75,
  'ratio': (1.0, 1.0),
  'val_xtra': 0.14}}

In [None]:
dls.after_batch[2].fs[1].__dict__

{'max_lighting': 0.2,
 'p': 0.75,
 'draw': None,
 'batch': False,
 'change': tensor([1.0732], device='cuda:0')}

In [None]:
my_d = dls.after_batch[2].fs[0].__dict__.copy()

In [None]:
my_d.pop('change')

{'max_lighting': 0.2,
 'p': 0.75,
 'draw': None,
 'batch': False,
 'change': TensorImage([0.5723], device='cuda:0')}

In [None]:
from fastai2.vision.augment import _BrightnessLogit

In [None]:
RandTransform??

In [None]:
def extract_logits(tfm):
    name = tfm.__class__.name
    t_d = tfm.__dict__
    t

(#2) [<fastai2.vision.augment._BrightnessLogit object at 0x7f77c6232f90>,<fastai2.vision.augment._ContrastLogit object at 0x7f77c6232e90>]

In [None]:
dls.after_batch[2].fs[0].__class__.__name__

'_BrightnessLogit'

In [None]:
ab_dict = {}
for tfm in dls.after_batch:
    if isinstance(tfm, AffineCoordTfm) or isinstance(tfm, LightingTfm):
        if hasattr(tfm, 'aff_fs'):
            for t in tfm.aff_fs:
                ab_dict[t.func.__name__] = t.keywords
        elif hasattr(tfm, 'coord_fs'):
            for t in tfm.coord_fs:
                t_d,n = _gen_dict(t)
                ab_dict[n] = t_d
        elif hasattr(tfm, 'fs'):
            for t in tfm.fs:
                t_d,n = _gen_dict()

here


In [None]:
#hide
test_eq(len(_make_tfm_dict(dls.tfms, True)), 1)
ab_dict = _make_tfm_dict(dls.after_batch)
in_('Normalize', ab_dict.keys());
not in_('Flip', ab_dict.keys());
it_dict = _make_tfm_dict(dls.after_item)
in_('RandomResizedCrop', ab_dict.keys())
not in_('ToTensor', ab_dict.keys());

In [None]:
#export
@typedispatch
def _extract_tfm_dicts(dl:TfmdDL):
    "Extracts all transform params from `dl`"
    type_tfm,use_images = True,False
    attrs = ['tfms','after_item','after_batch']
    tfm_dicts = {}
    for attr in attrs:
        tfm_dicts[attr] = _make_tfm_dict(getattr(dl, attr), type_tfm)
        if attr == 'tfms': 
            if getattr(dl,attr)[0][1].name == 'PILBase.create':
                use_images=True
        if attr == 'after_item': tfm_dicts[attr]['ToTensor'] = {'is_image':use_images}
        type_tfm = False
    return tfm_dicts

In [None]:
#export
def get_information(dls): return _extract_tfm_dicts(dls[0])

### get_information

This function will take any set of `DataLoaders` and extract the transforms which are important during inference and their information

In [None]:
tfm_info = get_information(dls)

In [None]:
#hide
test_eq(len(tfm_info),3)
test_eq(tfm_info.keys(), ['tfms','after_item','after_batch'])

For vision it will contain `tfms`, `after_item`, and `after_batch`

First, our `type` transforms:

In [None]:
tfm_info['tfms']

{'Categorize': {'vocab': ['Abyssinian',
   'Bengal',
   'Birman',
   'Bombay',
   'British_Shorthair',
   'Egyptian_Mau',
   'Maine_Coon',
   'Persian',
   'Ragdoll',
   'Russian_Blue',
   'Siamese',
   'Sphynx',
   'american_bulldog',
   'american_pit_bull_terrier',
   'basset_hound',
   'beagle',
   'boxer',
   'chihuahua',
   'english_cocker_spaniel',
   'english_setter',
   'german_shorthaired',
   'great_pyrenees',
   'havanese',
   'japanese_chin',
   'keeshond',
   'leonberger',
   'miniature_pinscher',
   'newfoundland',
   'pomeranian',
   'pug',
   'saint_bernard',
   'samoyed',
   'scottish_terrier',
   'shiba_inu',
   'staffordshire_bull_terrier',
   'wheaten_terrier',
   'yorkshire_terrier'],
  'add_na': False}}

Then the `item` transforms:

In [None]:
tfm_info['after_item']

{'RandomResizedCrop': {'size': (460, 460),
  'min_scale': 0.75,
  'ratio': (1.0, 1.0),
  'val_xtra': 0.14},
 'ToTensor': {'is_image': True}}

And finally our batch transforms:

In [None]:
tfm_info['after_batch']

{'IntToFloatTensor': {'div': 255.0, 'div_mask': 1},
 'Normalize': {'mean': array([[[[0.485]],
  
          [[0.456]],
  
          [[0.406]]]], dtype=float32),
  'std': array([[[[0.229]],
  
          [[0.224]],
  
          [[0.225]]]], dtype=float32),
  'axes': (0, 2, 3)}}

## Tabular

Next we'll look at a tabular example. We will use the `ADULT_SAMPLE` dataset here:

In [None]:
#export
from fastai2.tabular.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)

In [None]:
df = pd.read_csv(path/'adult.csv')
splits = RandomSplitter()(range_of(df))
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'

In [None]:
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, splits=splits)
dls = to.dataloaders()

In [None]:
dls.normalize

Normalize -- {'means': {'age': 38.52497216783754, 'fnlwgt': 189756.69177319668, 'education-num': 10.091481438826827}, 'stds': {'age': 13.633111349975714, 'fnlwgt': 105165.09125395071, 'education-num': 2.5544176335877404}}:
(Tabular,object) -> encodes
(TensorImage,object) -> encodes
 (Tabular,object) -> decodes
(TensorImage,object) -> decodes

In [None]:
#export
@typedispatch
def _extract_tfm_dicts(dl:TabDataLoader):
    "Extracts all transform params from `dl`"
    types = 'normalize,fill_missing,categorify'
    if hasattr(dl, 'categorize'): types += ',categorize'
    if hasattr(dl, 'regression_setup'): types += ',regression_setup'
    tfms = {}
    name2idx = {name:n for n,name in enumerate(dl.dataset) if name in dl.cat_names or name in dl.cont_names}
    idx2name = {v:k for k,v in name2idx.items()}
    cat_idxs = {name2idx[name]:name for name in cat_names}
    cont_idxs = {name2idx[name]:name for name in cont_names}
    names = {'cats':cat_idxs, 'conts':cont_idxs}
    tfms['encoder'] = names
    for t in types.split(','):
        tfm = getattr(dl, t)
        tfms[t] = to_list(attrdict(tfm, *tfm.store_attrs.split(',')))
    
    categorize = dl.procs.categorify.classes.copy()
    for i,c in enumerate(categorize):
        categorize[c] = {a:b for a,b in enumerate(categorize[c])}
        categorize[c] = {v: k for k, v in categorize[c].items()}
        categorize[c].pop('#na#')
        categorize[c][np.nan] = 0
    tfms['categorify']['classes'] = categorize
    new_dict = {}
    for k,v in tfms.items(): 
        if k == 'fill_missing': 
            k = 'FillMissing'
            new_dict.update({k:v})
        else: 
            new_dict.update({k.capitalize():v})
    return new_dict

The usage is the exact same:

In [None]:
tfm_dicts = get_information(dls)

In [None]:
#hide
test_eq(len(tfm_dicts),5)

However our keys are different. By default it will have `normalize`, `fill_missing`, and `categorify`, and then depending on what is available it will store either `categorize` or `regression_setup` to tell us about our outputs.

Here is an example from `Normalize`:

In [None]:
tfm_dicts['Normalize']

{'means': {'age': 38.579446427885905,
  'fnlwgt': 189089.21129409957,
  'education-num': 10.079273676532688},
 'stds': {'age': 13.668500198858403,
  'fnlwgt': 105206.02215862622,
  'education-num': 2.5518035837225304}}

`FillMissing`:

In [None]:
tfm_dicts['FillMissing']

{'fill_strategy': 'median',
 'add_col': True,
 'fill_vals': {'education-num': 0},
 'na_dict': {'education-num': 10.0}}

And `Categorify`:

In [None]:
tfm_dicts['Categorify']['classes'].keys()

dict_keys(['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'education-num_na'])

Before finally `categorize` (since we have a classification problem):

In [None]:
tfm_dicts['Categorize']

{'vocab': ['<50k', '>=50k'], 'add_na': False}

## Exporting

To export, a new `to_fastinference` function has been made

In [None]:
#export
@patch
def to_fastinference(x:Learner, data_fname='data', model_fname='model', path=Path('.')):
    "Export data for `fastinference_onnx` or `_pytorch` to use"
    if not isinstance(path,Path): path = Path(path)
    dicts = get_information(x.dls)
    with open(path/f'{data_fname}.pkl', 'wb') as handle:
        pickle.dump(dicts, handle, protocol=pickle.HIGHEST_PROTOCOL)
    torch.save(x.model, path/f'{model_fname}.pkl')

In [None]:
doc(Learner.to_fastinference)

Params:

* `data_fname`: Filename to save our extracted `DataLoader` information, default is `data`
* `model_fname`: Filename to save our current model, default is `model`
* `path`: Path to save our model and data to, default is `.`

Exported files will have the extension `.pkl`

In [None]:
learn = tabular_learner(dls, [200,100], metrics=[accuracy])
learn.to_fastinference(path='../../')

Simply call `learn.to_fastinference` and it will export everything needed for `fastinference_pytorch` or `fastinference_onnx`

In [None]:
learn.to_fastinference(data_fname = 'data', model_fname = 'model', path = Path('.'))

In [None]:
#hide
"""
# TODO: Text
Things to save:


* `data.vocab`
* `data.o2i`
* Tokenizer
* All the rules in `text.core`:
[<function fastai2.text.core.fix_html>,
 <function fastai2.text.core.replace_rep>,
 <function fastai2.text.core.replace_wrep>,
 <function fastai2.text.core.spec_add_spaces>,
 <function fastai2.text.core.rm_useless_spaces>,
 <function fastai2.text.core.replace_all_caps>,
 <function fastai2.text.core.replace_maj>,
 <function fastai2.text.core.lowercase>]

- Ensure that `L` is in the library
"""