In [None]:
#|default_exp models.utils

# Model utilities

>Utility functions used to build PyTorch timeseries models.

In [None]:
#|export
from tsai.imports import *
from copy import deepcopy
from fastai.layers import flatten_model, params, apply_init
from fastai.learner import Learner
from fastai.data.transforms import get_c
from fastai.tabular.model import *
from fastai.callback.schedule import *
from fastai.vision.models.xresnet import *
from tsai.models.layers import *

In [None]:
#|export
def apply_idxs(o, idxs):
    "Function to apply indices to zarr, dask and numpy arrays"
    if is_zarr(o): return o.oindex[idxs]
    elif is_dask(o): return o[idxs].compute()
    else: return o[idxs]

In [None]:
#|export
def SeqTokenizer(c_in, embed_dim, token_size=60, norm=False):
    "Generates non-overlapping tokens from sub-sequences within a sequence by applying a sliding window"
    return ConvBlock(c_in, embed_dim, token_size, stride=token_size, padding=0, act=None, 
                     norm='Batch' if norm else None, bias=norm is None)

SeqEmbed = SeqTokenizer

In [None]:
#|export
def get_embed_size(n_cat, rule='log2'):
    if rule == 'log2':
        return int(np.ceil(np.log2(n_cat)))
    elif rule == 'thumb':
        return min(600, round(1.6 * n_cat**0.56)) # fastai's

In [None]:
test_eq(get_embed_size(35), 6)

In [None]:
#|export
def get_layers(model, cond=noop, full=True):
    if isinstance(model, Learner): model=model.model
    if full: return [m for m in flatten_model(model) if any([c(m) for c in L(cond)])]
    else: return [m for m in model if any([c(m) for c in L(cond)])]

def is_layer(*args):
    def _is_layer(l, cond=args):
        return isinstance(l, cond)
    return partial(_is_layer, cond=args)

def is_linear(l):
    return isinstance(l, nn.Linear)

def is_bn(l):
    types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)
    return isinstance(l, types)

def is_conv_linear(l):
    types = (nn.Conv1d, nn.Conv2d, nn.Conv3d, nn.Linear)
    return isinstance(l, types)

def is_affine_layer(l):
    return has_bias(l) or has_weight(l)

def is_conv(l):
    types = (nn.Conv1d, nn.Conv2d, nn.Conv3d)
    return isinstance(l, types)

def has_bias(l):
    return (hasattr(l, 'bias') and l.bias is not None)

def has_weight(l):
    return (hasattr(l, 'weight'))

def has_weight_or_bias(l):
    return any((has_weight(l), has_bias(l)))

In [None]:
#|export
def check_bias(m, cond=noop, verbose=False):
    mean, std = [], []
    for i,l in enumerate(get_layers(m, cond=cond)): 
        if hasattr(l, 'bias') and l.bias is not None: 
            b = l.bias.data
            mean.append(b.mean())
            std.append(b.std())
            pv(f'{i:3} {l.__class__.__name__:15} shape: {str(list(b.shape)):15}  mean: {b.mean():+6.3f}  std: {b.std():+6.3f}', verbose)
    return np.array(mean), np.array(std)
            
def check_weight(m, cond=noop, verbose=False):
    mean, std = [], []
    for i,l in enumerate(get_layers(m, cond=cond)): 
        if hasattr(l, 'weight') and l.weight is not None: 
            w = l.weight.data
            mean.append(w.mean())
            std.append(w.std())
            pv(f'{i:3} {l.__class__.__name__:15} shape: {str(list(w.shape)):15}  mean: {w.mean():+6.3f}  std: {w.std():+6.3f}', verbose)
    return np.array(mean), np.array(std)

In [None]:
#|export
def get_nf(m):
    "Get nf from model's first linear layer in head"
    return get_layers(m[-1], is_linear)[0].in_features

In [None]:
#|export
def ts_splitter(m):
    "Split of a model between body and head"
    return L(m.backbone, m.head).map(params)

In [None]:
#|export
def transfer_weights(model, weights_path:Path, device:torch.device=None, exclude_head:bool=True):
    """Utility function that allows to easily transfer weights between models.
    Taken from the great self-supervised repository created by Kerem Turgutlu.
    https://github.com/KeremTurgutlu/self_supervised/blob/d87ebd9b4961c7da0efd6073c42782bbc61aaa2e/self_supervised/utils.py"""

    device = ifnone(device, default_device())
    state_dict = model.state_dict()
    new_state_dict = torch.load(weights_path, map_location=device)
    matched_layers = 0
    unmatched_layers = []
    for name, param in state_dict.items():
        if exclude_head and 'head' in name: continue
        if name in new_state_dict:
            matched_layers += 1
            input_param = new_state_dict[name]
            if input_param.shape == param.shape: param.copy_(input_param)
            else: unmatched_layers.append(name)
        else:
            unmatched_layers.append(name)
            pass # these are weights that weren't in the original model, such as a new head
    if matched_layers == 0: raise Exception("No shared weight names were found between the models")
    else:
        if len(unmatched_layers) > 0:
            print(f'check unmatched_layers: {unmatched_layers}')
        else:
            print(f"weights from {weights_path} successfully transferred!\n")

In [None]:
#|export
def build_ts_model(arch, c_in=None, c_out=None, seq_len=None, d=None, dls=None, device=None, verbose=False, 
                   pretrained=False, weights_path=None, exclude_head=True, cut=-1, init=None, arch_config={}, **kwargs):

    device = ifnone(device, default_device())
    if dls is not None:
        c_in = ifnone(c_in, dls.vars)
        c_out = ifnone(c_out, dls.c)
        seq_len = ifnone(seq_len, dls.len)
        d = ifnone(d, dls.d)
    if d and not 'patchtst' in arch.__name__.lower(): 
        if 'custom_head' not in kwargs.keys(): 
            kwargs['custom_head'] = partial(lin_nd_head, d=d)
        elif not isinstance(kwargs['custom_head'], nn.Module):
            kwargs['custom_head'] = partial(kwargs['custom_head'], d=d)
    if 'ltsf_' in arch.__name__.lower() or 'patchtst' in arch.__name__.lower():
        pv(f'arch: {arch.__name__}(c_in={c_in} c_out={c_out} seq_len={seq_len} pred_dim={d} arch_config={arch_config}, kwargs={kwargs})', verbose)
        model = (arch(c_in=c_in, c_out=c_out, seq_len=seq_len, pred_dim=d, **arch_config, **kwargs)).to(device=device)
    elif sum([1 for v in ['RNN_FCN', 'LSTM_FCN', 'RNNPlus', 'LSTMPlus', 'GRUPlus', 'InceptionTime', 'TSiT', 'Sequencer',
                        'GRU_FCN', 'OmniScaleCNN', 'mWDN', 'TST', 'XCM', 'MLP', 'MiniRocket', 'InceptionRocket']
            if v in arch.__name__]):
        pv(f'arch: {arch.__name__}(c_in={c_in} c_out={c_out} seq_len={seq_len} arch_config={arch_config} kwargs={kwargs})', verbose)
        model = arch(c_in, c_out, seq_len=seq_len, **arch_config, **kwargs).to(device=device)
    elif 'xresnet' in arch.__name__ and not '1d' in arch.__name__:
        pv(f'arch: {arch.__name__}(c_in={c_in} c_out={c_out} arch_config={arch_config} kwargs={kwargs})', verbose)
        model = (arch(c_in=c_in, n_out=c_out, **arch_config, **kwargs)).to(device=device)
    elif 'minirockethead' in arch.__name__.lower():
        pv(f'arch: {arch.__name__}(c_in={c_in} seq_len={seq_len} arch_config={arch_config} kwargs={kwargs})', verbose)
        model = (arch(c_in, c_out, seq_len=1, **arch_config, **kwargs)).to(device=device)
    elif 'rocket' in arch.__name__.lower():
        pv(f'arch: {arch.__name__}(c_in={c_in} seq_len={seq_len} arch_config={arch_config} kwargs={kwargs})', verbose)
        model = (arch(c_in=c_in, seq_len=seq_len, **arch_config, **kwargs)).to(device=device)
    else:
        pv(f'arch: {arch.__name__}(c_in={c_in} c_out={c_out} arch_config={arch_config} kwargs={kwargs})', verbose)
        model = arch(c_in, c_out, **arch_config, **kwargs).to(device=device)

    try:
        model[0], model[1]
        subscriptable = True
    except:
        subscriptable = False
    if hasattr(model, "head_nf"):  head_nf = model.head_nf
    else:
        try: head_nf = get_nf(model)
        except: head_nf = None

    if not subscriptable and 'Plus' in arch.__name__:
        model = nn.Sequential(*model.children())
        model.backbone = model[:cut]
        model.head = model[cut:]

    if pretrained and not ('xresnet' in arch.__name__ and not '1d' in arch.__name__):
        assert weights_path is not None, "you need to pass a valid weights_path to use a pre-trained model"
        transfer_weights(model, weights_path, exclude_head=exclude_head, device=device)

    if init is not None:
        apply_init(model[1] if pretrained else model, init)

    setattr(model, "head_nf", head_nf)
    setattr(model, "__name__", arch.__name__)

    return model
    
build_model = build_ts_model
create_model = build_ts_model

In [None]:
#|export
def count_parameters(model, trainable=True):
    if trainable: return sum(p.numel() for p in model.parameters() if p.requires_grad)
    else: return sum(p.numel() for p in model.parameters())

In [None]:
#|export
# @delegates(XResNet.__init__)
def build_tsimage_model(arch, c_in=None, c_out=None, dls=None, pretrained=False, device=None, verbose=False, init=None, arch_config={}, **kwargs):
    device = ifnone(device, default_device())
    if dls is not None:
        c_in = ifnone(c_in, dls.vars)
        c_out = ifnone(c_out, dls.c)
    
    model = arch(pretrained=pretrained, c_in=c_in, n_out=c_out, **arch_config, **kwargs).to(device=device)
    setattr(model, "__name__", arch.__name__)
    if init is not None: 
        apply_init(model[1] if pretrained else model, init)
    return model

In [None]:
#|export
# @delegates(TabularModel.__init__)
def build_tabular_model(arch, dls, layers=None, emb_szs=None, n_out=None, y_range=None, device=None, arch_config={}, **kwargs):
    if device is None: device = default_device()
    if layers is None: layers = [200,100]
    emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in kwargs: y_range = kwargs.pop('y_range')
    model = arch(emb_szs, len(dls.cont_names), n_out, layers, y_range=y_range, **arch_config, **kwargs).to(device=device)
    
    if hasattr(model, "head_nf"):  head_nf = model.head_nf
    else: head_nf = get_nf(model)
    setattr(model, "__name__", arch.__name__)
    if head_nf is not None: setattr(model, "head_nf", head_nf)
    return model

create_tabular_model = build_tabular_model

In [None]:
from tsai.data.external import get_UCR_data
from tsai.data.core import TSCategorize, get_ts_dls
from tsai.data.preprocessing import TSStandardize
from tsai.models.InceptionTime import *

In [None]:
X, y, splits = get_UCR_data('NATOPS', split_data=False)
tfms = [None, TSCategorize()]
batch_tfms = TSStandardize()
dls = get_ts_dls(X, y, splits, tfms=tfms, batch_tfms=batch_tfms)
model = build_ts_model(InceptionTime, dls=dls)
test_eq(count_parameters(model), 460038)

In [None]:
#|export
def get_clones(module, N):
    return nn.ModuleList([deepcopy(module) for i in range(N)])

In [None]:
m = nn.Conv1d(3,4,3)
get_clones(m, 3)

ModuleList(
  (0): Conv1d(3, 4, kernel_size=(3,), stride=(1,))
  (1): Conv1d(3, 4, kernel_size=(3,), stride=(1,))
  (2): Conv1d(3, 4, kernel_size=(3,), stride=(1,))
)

In [None]:
#|export
def split_model(m): return m.backbone, m.head

In [None]:
#|export
@torch.no_grad()
def output_size_calculator(mod, c_in, seq_len=None):
    assert isinstance(mod, nn.Module)
    return_q_len = True
    if seq_len is None:
        seq_len = 50
        return_q_len = False
    try: 
        params_0 = list(mod.parameters())[0]
        xb = torch.rand(1, c_in, seq_len, device=params_0.device, dtype=params_0.dtype)
    except: 
        xb = torch.rand(1, c_in, seq_len)
    training = mod.training
    mod.eval()
    c_out, q_len = mod(xb).shape[1:]
    mod.training = training
    if return_q_len:
        return c_out, q_len
    else: 
        return c_out, None

In [None]:
c_in = 3
seq_len = 30
m = nn.Conv1d(3, 12, kernel_size=3, stride=2)
new_c_in, new_seq_len = output_size_calculator(m, c_in, seq_len)
test_eq((new_c_in, new_seq_len), (12, 14))

In [None]:
#|export
def change_model_head(model, custom_head, **kwargs):
    r"""Replaces a model's head by a custom head as long as the model has a head, head_nf, c_out and seq_len attributes"""
    model.head = custom_head(model.head_nf, model.c_out, model.seq_len, **kwargs)
    return model

In [None]:
#|export
def naive_forecaster(o, split, horizon=1):
    if is_listy(horizon):
        _f = []
        for h in horizon:
            _f.append(o[np.asarray(split)-h])
        return np.stack(_f)
    return o[np.asarray(split) - horizon]

def true_forecaster(o, split, horizon=1):
    o_true = o[split]
    if is_listy(horizon): 
        o_true = o_true[np.newaxis].repeat(len(horizon), 0)
    return o_true

In [None]:
a = np.random.rand(20).cumsum()
split = np.arange(10, 20)
a, naive_forecaster(a, split, 1), true_forecaster(a, split, 1)

(array([ 0.39797843,  1.27891504,  1.94817929,  2.21751897,  2.4325006 ,
         2.54909223,  3.38126806,  3.88615614,  4.451216  ,  4.89321995,
         5.22069681,  6.18352585,  6.96211212,  7.67760784,  7.9064437 ,
         8.88161211,  9.45559596, 10.3985896 , 10.95580727, 11.85862006]),
 array([ 4.89321995,  5.22069681,  6.18352585,  6.96211212,  7.67760784,
         7.9064437 ,  8.88161211,  9.45559596, 10.3985896 , 10.95580727]),
 array([ 5.22069681,  6.18352585,  6.96211212,  7.67760784,  7.9064437 ,
         8.88161211,  9.45559596, 10.3985896 , 10.95580727, 11.85862006]))

In [None]:
#|eval: false
#|hide
from tsai.export import get_nb_name; nb_name = get_nb_name(locals())
from tsai.imports import create_scripts; create_scripts(nb_name)

<IPython.core.display.Javascript object>

/Users/nacho/notebooks/tsai/nbs/030_models.utils.ipynb saved at 2023-02-01 20:49:54
Correct notebook to script conversion! 😃
Wednesday 01/02/23 20:49:57 CET
