# Losses

In [None]:
#| default_exp losses

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| export
import functools, torch, torch.nn as nn, torch.nn.functional as F
from typing import MutableSequence, Union
import torch.autograd.profiler as profiler

from fastcore.utils import *
from fastcore.meta import *

from xcai.torch_core import *

In [None]:
#| hide
from nbdev.showdoc import *
import nbdev; nbdev.nbdev_export()

## Setup

In [None]:
from xcai.block import *
from xcai.models.MMM00X import *

In [None]:
block = XCBlock.from_cfg('train', tfm='ng', tokz='bert-base-uncased')

In [None]:
m = BT0001.from_pretrained('bert-base-uncased')

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


In [None]:
bsz = 64
batch = block.train.one_batch(bsz)
b = prepare_batch(m,batch, m_args=['lbl2data_idx_smp'])
# m,b = m.to('cuda'),b.to('cuda')
data_logits, lbl2data_input_ids, lbl2data_data2ptr, lbl2data_idx, data_repr, lbl2data_repr, kwargs = m(**b)

In [None]:
data_logits.shape, data_logits.shape, lbl2data_data2ptr.shape, lbl2data_idx.shape, data_repr.shape, lbl2data_repr.shape

(torch.Size([64, 13, 30522]),
 torch.Size([64, 13, 30522]),
 torch.Size([64]),
 torch.Size([141]),
 torch.Size([64, 768]),
 torch.Size([64, 768]))

## BaseLoss

In [None]:
#| export
class BaseLoss(nn.Module):

    def __init__(self, 
                 reduce:Optional[str]=None, 
                 **kwargs):
        super().__init__()
        self.reduce = reduce

    @property
    def reduction(self) -> str: return self.reduce
    
    @reduction.setter
    def reduction(self, v:str):
        "Sets the reduction style (typically 'mean', 'sum', or 'none')" 
        self.reduce = v
        

## MultiCrossEntropy

In [None]:
#| export
class MultiCrossEntropy(BaseLoss):

    def __init__(self,
                 tn_targ:Optional[int]=None, 
                 ig_tok:Optional[int]=0,
                 **kwargs):
        super().__init__(**kwargs)
        self.tn_targ, self.ig_tok = tn_targ, ig_tok
        self.o = torch.ones(tn_targ, dtype=torch.int64) if tn_targ is not None else None
        self._parameters = {'o': self.o}
        

In [None]:
bsz = 512
batch = block.train.one_batch(bsz)
b = prepare_batch(m,batch)
m,b = m.to('cuda'),b.to('cuda')
data_logits, lbl2data_input_ids, lbl2data_data2ptr, data_repr, lbl2data_repr = m(**b)

In [None]:
mce_fn = MultiCrossEntropy(10_000, reduce='mean').to('cuda')

In [None]:
#| export
@patch
def __call__(cls:MultiCrossEntropy,
             inp:torch.FloatTensor,
             targ:torch.LongTensor,
             n_inp2targ:torch.LongTensor, 
             **kwargs):
    tn_targ, targ_len = targ.shape
    bsz, inp_len, mn_targ = inp.shape[0], inp.shape[1], n_inp2targ.max()
    seq_len = min(targ_len, inp_len)
    inp, targ = -F.log_softmax(inp, dim=2)[:, :seq_len].transpose(1,2), targ[:, :seq_len]
    
    inp2targ_ptr = n_inp2targ.cumsum(dim=0)-1
    xn_inp2targ = mn_targ-n_inp2targ+1
    r_targ = (
        torch.ones(tn_targ, dtype=torch.int64, device=inp.device).scatter(0, inp2targ_ptr, xn_inp2targ)
        if cls.tn_targ is None or tn_targ > cls.tn_targ else
        cls.o[:tn_targ].scatter(0, inp2targ_ptr, xn_inp2targ)
    )
    xtarg = targ.repeat_interleave(r_targ, dim=0)

    s = inp.gather(1, xtarg.view(bsz, -1, seq_len)).view(-1, seq_len)
    s /= r_targ.repeat_interleave(r_targ, dim=0).view(-1, 1)
    idx = torch.where(xtarg != cls.ig_tok)
    loss = s[idx[0], idx[1]]
    
    if cls.reduction == 'mean': return (loss/len(torch.where(targ != cls.ig_tok)[0])).sum()
    elif cls.reduction == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')


In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = mce_fn(data_logits, lbl2data_input_ids, lbl2data_data2ptr)

STAGE:2024-04-14 01:41:23 9626:9626 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-04-14 01:41:24 9626:9626 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-14 01:41:24 9626:9626 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [None]:
print(prof)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                              aten::max         4.32%     561.000us        27.11%       3.523ms       3.523ms           0 b           0 b         512 b           0 b             1  
                                            aten::empty         0.27%      35.000us         0.27%      35.000us      35.000us           0 b           0 b         512 b         512 b             1  
         

In [None]:
@patch
def __call__(cls:MultiCrossEntropy, 
             inp:torch.FloatTensor, 
             targ:torch.LongTensor, 
             n_inp2targ:torch.LongTensor, 
             **kwargs):
    inp_len, targ_len = inp.shape[1], targ.shape[1]
    seq_len = min(inp_len, targ_len)
    inp, targ = -F.log_softmax(inp, dim=2)[:, :seq_len], targ[:, :seq_len].unsqueeze(2)
    inp = inp.repeat_interleave(n_inp2targ, dim=0)
    s = inp.gather(2, targ)
    idx = torch.where(targ != cls.ig_tok)
    loss = s[idx[0], idx[1]]
    if cls.reduction == 'mean': return loss.mean()
    elif cls.reduction == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
        

In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = mce_fn(data_logits, lbl2data_input_ids, lbl2data_data2ptr)

STAGE:2024-04-14 01:41:33 9626:9626 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-04-14 01:41:33 9626:9626 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-14 01:41:33 9626:9626 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.19 GiB. GPU 0 has a total capacity of 31.74 GiB of which 580.88 MiB is free. Process 37907 has 1.90 GiB memory in use. Process 37366 has 4.15 GiB memory in use. Including non-PyTorch memory, this process has 25.12 GiB memory in use. Of the allocated memory 24.63 GiB is allocated by PyTorch, and 114.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
print(prof)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                      aten::log_softmax         0.08%       8.000us         5.52%     526.000us     526.000us           0 b           0 b     454.00 Mb           0 b             1  
                                     aten::_log_softmax         5.14%     490.000us         5.43%     518.000us     518.000us           0 b           0 b     454.00 Mb     454.00 Mb             1  
         

In [None]:
@patch
def __call__(cls:MultiCrossEntropy, 
             inp:torch.FloatTensor, 
             targ:torch.LongTensor, 
             n_inp2targ:torch.LongTensor, 
             **kwargs):
    inp_len, targ_len = inp.shape[1], targ.shape[1]
    seq_len = min(inp_len, targ_len)
    inp, targ = -F.log_softmax(inp, dim=2)[:, :seq_len], targ[:, :seq_len]
    num, s = 0, []
    for i,n in zip(inp, n_inp2targ):
        for _ in range(n):
            s.append(i.gather(1, targ[num].view(-1, 1)).view(1, -1))
            num += 1
    s = torch.vstack(s)
    idx = torch.where(targ != cls.ig_tok)
    loss = s[idx[0], idx[1]]
    if cls.reduction == 'mean': return loss.mean()
    elif cls.reduction == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
    

In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = mce_fn(data_logits, lbl2data_input_ids, lbl2data_data2ptr)

STAGE:2024-04-14 01:43:47 11200:11200 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-04-14 01:43:48 11200:11200 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-14 01:43:48 11200:11200 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [None]:
print(prof)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                      aten::log_softmax         0.04%      20.000us         2.59%       1.400ms       1.400ms           0 b           0 b    1014.00 Mb           0 b             1  
                                     aten::_log_softmax         2.47%       1.334ms         2.56%       1.380ms       1.380ms           0 b           0 b    1014.00 Mb    1014.00 Mb             1  
         

## MultiTriplet

In [None]:
#| export
class MultiTriplet(BaseLoss):

    def __init__(self,
                 bsz:Optional[int]=None, 
                 tn_targ:Optional[int]=None,
                 margin:Optional[float]=0.8,
                 **kwargs):
        super().__init__(**kwargs)
        self.bsz, self.tn_targ, self.margin = bsz, tn_targ, margin
        self.u = torch.arange(bsz, dtype=torch.int64) if bsz is not None else None
        self.v = torch.ones(tn_targ, dtype=torch.int64) if tn_targ is not None else None
        self._parameters = {'u':self.u, 'v':self.v}
        

In [None]:
bsz = 10
batch = block.train.one_batch(bsz)
data_logits, lbl2data_input_ids, lbl2data_data2ptr, data_repr, lbl2data_repr = m(**batch)

In [None]:
mtl_fn = MultiTriplet(bsz, 10_000, 0.8, reduce='mean').to('cuda')

In [None]:
#| export
@patch
def __call__(cls:MultiTriplet, 
             inp:torch.FloatTensor, 
             targ:torch.LongTensor, 
             n_inp2targ:torch.LongTensor,
             margin:Optional[float]=None, 
             **kwargs):
    cls.margin = cls.margin if margin is None else margin
    bsz, tn_targ, mn_targ = inp.shape[0], targ.shape[0], n_inp2targ.max()
    u = torch.arange(bsz, dtype=torch.int64, device=inp.device) if cls.u is None or cls.bsz < bsz else cls.u[:bsz]
    v = (
        torch.ones(tn_targ, dtype=torch.int64, device=targ.device)
        if cls.tn_targ is None or tn_targ > cls.tn_targ else cls.v[:tn_targ]
    )
    targ2inp_ptr = u.repeat_interleave(n_inp2targ)
    s = targ@inp.T
    ps = s.gather(1, targ2inp_ptr.view(-1,1))
    
    inp2targ_ptr = n_inp2targ.cumsum(dim=0)-1
    xn_inp2targ = mn_targ-n_inp2targ+1
    
    r_targ = v.scatter(0, inp2targ_ptr, xn_inp2targ)
    
    targ2inp_ptrx = targ2inp_ptr.repeat_interleave(r_targ)
    mask, maskx = F.one_hot(targ2inp_ptr), F.one_hot(targ2inp_ptrx)
    fmask = CUDALongTensor.matmul(maskx,mask.T)
    psx = ps.repeat_interleave(r_targ).view(bsz, -1, 1)
    s = s.T.view(bsz, 1, -1)
    fs = (s - psx + cls.margin).view(-1, tn_targ)
    fs /= r_targ.repeat_interleave(r_targ).view(-1, 1)
    
    idx = torch.where(fmask == 0)
    loss = fs[idx[0], idx[1]]
    loss, n = torch.where(loss > 0, loss, 0), (n_inp2targ.sum())**2 - (n_inp2targ**2).sum()
    if cls.reduction == 'mean': return (loss/n).sum()
    elif cls.reduction == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
        

In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = mtl_fn(data_repr, lbl2data_repr, lbl2data_data2ptr)
    print(loss)

STAGE:2024-04-19 01:54:04 13866:13866 ActivityProfilerController.cpp:314] Completed Stage: Warm Up


tensor(1.4957, device='cuda:0', grad_fn=<SumBackward0>)


STAGE:2024-04-19 01:54:05 13866:13866 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-19 01:54:05 13866:13866 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [None]:
print(prof)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                              aten::max         0.95%     519.000us         1.11%     610.000us     610.000us           0 b           0 b         512 b           0 b             1  
                                            aten::empty         0.08%      43.000us         0.08%      43.000us      43.000us           0 b           0 b         512 b         512 b             1  
         

In [None]:
@patch
def __call__(cls:MultiTriplet, 
             inp:torch.FloatTensor, 
             targ:torch.LongTensor, 
             n_inp2targ:torch.LongTensor, 
             margin:Optional[float]=None, 
             **kwargs):
    cls.margin = cls.margin if margin is None else margin
    score = inp@targ.T
    ptr, fs = 0, []
    for i, n in enumerate(n_inp2targ):
        ps = score[i, ptr:ptr+n].view(-1, 1)
        s = (score[i] - ps + cls.margin).roll(-ptr, 1)
        fs.append(s[:, n:].flatten())
        ptr += n.item()
    loss = torch.hstack(fs)
    loss = torch.where(loss > 0, loss, 0)
    if cls.reduction == 'mean': return loss.mean()
    elif cls.reduction == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
             

In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = mtl_fn(data_repr, lbl2data_repr, lbl2data_data2ptr)
    print(loss)

STAGE:2024-04-19 01:54:10 13866:13866 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-04-19 01:54:10 13866:13866 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-19 01:54:10 13866:13866 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


tensor(1.4957, device='cuda:0', grad_fn=<MeanBackward0>)


In [None]:
print(prof)

-------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                    aten::numpy_T         0.00%       5.000us         0.03%      53.000us      53.000us           0 b           0 b           0 b           0 b             1  
                                    aten::permute         0.03%      38.000us         0.03%      48.000us      48.000us           0 b           0 b           0 b           0 b             1  
                                 aten::

## Triplet

In [None]:
#| export
class Triplet(BaseLoss):

    def __init__(self, 
                 margin:Optional[float]=0.8, 
                 **kwargs):
        super().__init__(**kwargs)
        self.margin = margin


In [None]:
#| export
@patch
def __call__(cls:Triplet, 
             inp:torch.FloatTensor, 
             targ:torch.LongTensor, 
             n_inp2targ:torch.LongTensor,
             inp2targ_idx:torch.LongTensor,
             inp2targ_idx_smp:torch.LongTensor,
             margin:Optional[float]=None, 
             **kwargs):
    cls.margin = cls.margin if margin is None else margin
    _, idx = torch.unique(torch.cat([inp2targ_idx_smp, inp2targ_idx]), return_inverse=True)
    inp2targ_idx, inp2targ_val = idx[len(inp2targ_idx_smp):], torch.ones_like(inp2targ_idx)
    inp2targ_ptr = torch.cat([torch.zeros(1, device=n_inp2targ.device, dtype=n_inp2targ.dtype), n_inp2targ.cumsum(0)])
    ne = 1 - torch.sparse_csr_tensor(inp2targ_ptr, inp2targ_idx, inp2targ_val, device=n_inp2targ.device).to_dense()[:, idx[:len(inp2targ_idx_smp)]]
    sc = inp@targ.T
    loss = torch.clamp((sc - sc.diagonal().unsqueeze(1) + cls.margin) * ne, 0)
    if cls.reduction == 'mean': return loss.sum()/ne.sum()
    elif cls.reduction == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
        

In [None]:
tl_fn = Triplet(0.8, reduce='mean')

In [None]:
loss = tl_fn(data_repr, lbl2data_repr, lbl2data_data2ptr, lbl2data_idx, kwargs['lbl2data_idx_smp'])

In [None]:
loss

tensor(0.6015, grad_fn=<DivBackward0>)

## SoupCon

In [None]:
#| export
class SoupCon(BaseLoss):

    @delegates(BaseLoss.__init__)
    def __init__(self,
                 bsz:Optional[int]=None, 
                 **kwargs):
        super().__init__(**kwargs)
        self.bsz = bsz
        self.t = torch.arange(bsz, dtype=torch.int64) if bsz is not None else None
        self._parameters = {'t':self.t}
        

In [None]:
bsz = 10
batch = block.train.one_batch(bsz)
data_logits, data_logits, lbl2data_data2ptr, data_repr, lbl2data_repr = m(**batch)

In [None]:
scn_fn = SoupCon(bsz, reduce='mean').to('cuda')

In [None]:
#| export
@patch
def __call__(cls:SoupCon,
             inp:torch.FloatTensor,
             targ:torch.LongTensor,
             n_inp2targ:torch.LongTensor, 
             **kwargs):
    bsz = inp.shape[0]
    t = torch.arange(bsz, dtype=torch.int64, device=inp.device) if cls.t is None or cls.bsz < bsz else cls.t[:bsz]
    targ2inp_ptr = t.repeat_interleave(n_inp2targ)
    s = -F.log_softmax(targ@inp.T, dim=0)
    ps = s.gather(1, targ2inp_ptr.unsqueeze(1)).squeeze(1)
    if cls.reduce == 'mean':
        ps /= n_inp2targ.repeat_interleave(n_inp2targ)
        ps /= bsz
        return ps.sum()
    elif cls.reduce == 'sum': return ps.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
        

In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = scn_fn(data_repr, lbl2data_repr, lbl2data_data2ptr)
    print(loss)

tensor(13.0039, device='cuda:0', grad_fn=<SumBackward0>)


STAGE:2024-04-14 01:53:44 12684:12684 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-04-14 01:53:44 12684:12684 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-14 01:53:44 12684:12684 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [None]:
print(prof)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::slice         0.90%      22.000us         1.18%      29.000us      29.000us           0 b           0 b           0 b           0 b             1  
                                       aten::as_strided         0.29%       7.000us         0.29%       7.000us       7.000us           0 b           0 b           0 b           0 b             1  
         

In [None]:
@patch
def __call__(cls:SoupCon, 
             inp:torch.FloatTensor, 
             targ:torch.LongTensor, 
             n_inp2targ:torch.LongTensor, 
             **kwargs):
    bsz = inp.shape[0]
    s = -F.log_softmax(inp@targ.T, dim=1)
    ptr, loss = 0, []
    for i,n in zip(s, n_inp2targ):
        ps = i[ptr:ptr+n]
        ptr += n
        if cls.reduce == 'mean': ps = ps/n
        loss.append(ps)
    loss = torch.hstack(loss)
    if cls.reduce == 'mean': return (loss/bsz).sum()
    elif cls.reduce == 'sum': return loss.sum()
    else: raise ValueError(f'`reduction` cannot be `{cls.reduction}`')
        

In [None]:
with profiler.profile(with_stack=True, profile_memory=True) as prof:
    loss = scn_fn(data_repr, lbl2data_repr, lbl2data_data2ptr)
    print(loss)

STAGE:2024-04-14 01:54:12 12684:12684 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-04-14 01:54:12 12684:12684 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-04-14 01:54:12 12684:12684 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


tensor(13.0039, device='cuda:0', grad_fn=<SumBackward0>)


In [None]:
print(prof)

-----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                         Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-----------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                aten::numpy_T         0.02%      10.000us         0.11%      59.000us      59.000us           0 b           0 b           0 b           0 b             1  
                aten::permute         0.07%      40.000us         0.09%      49.000us      49.000us           0 b           0 b           0 b           0 b             1  
             aten::as_strided         0.02%       9.000us         0.02%       9.000us       9.000us           0 b           0 b           0