# Padding Time Trial

Compare the current implementation of padding with the new padding, seeing which is faster.


**Raytheon Technologies proprietary**

Export controlled - see license file

In [1]:
from providence.dataloaders import providence_pad_sequence, is_list_of

In [3]:
from typing import Union, Sequence, List, Tuple

from numpy import ndarray, random as np_random, shape
from torch import device, Tensor, zeros

def providence_pad_sequence_old(
    data: Union[Sequence[Tensor], List[ndarray]], target_device: device = device('cpu')
) -> Tuple[Tensor, List[int]]:
    """
    Padding function for variable length sequences
    This function concatenates a list of panels. The result
    will resemble something akin to the following:
    .. code-block::

            /     FEATURE2   /     FEATURE2   /     FEATURE3    /|
           /_______________ /________________/________________ / |
          /     FEATURE1   /     FEATURE1   /     FEATURE1    /| |
         /_______________ / _______________/_______________  / |/|
    T1   |   Subject1    |   Subject2    |   Subject3       | /| |
         |_______________|_______________|__________________|/ |/|
    T2   |   Subject1    |   Subject2    |   Subject3       | /| |
         |_______________|_______________|__________________|/ | |
         |               |               |                  |  | |
         ...
    :param data: List of NxM matricies
    :return: (Tensor, List[int])
    """
    lengths = [len(x) for x in data]
    num_features = data[0].shape[1:]
    dims = (max(lengths), len(data)) + num_features  # The resulting tensor will be TIME x SUBJECT X FEATURES
    padded = zeros(*dims, device=target_device)  # initialize a zero tensor of with dimensions *dims

    if isinstance(data, Tensor):
        data = data.clone().detach()
        # manual reshaping. Is there a better way to do this?
        for i, sequence in enumerate(data):
            padded[:lengths[i], i, :] = sequence
    elif is_list_of(data, Tensor):
        for i, sequence in enumerate(data):
            padded[:lengths[i], i, :] = sequence.clone().detach()
    else:
        for i, sequence in enumerate(data):
            padded[:lengths[i], i, :] = Tensor(sequence, device=target_device)
    return padded, lengths

## Numpy Arrays

In [4]:
from pandas import DataFrame
from string import ascii_letters
from typing import List

In [5]:
def list_of_numpy_arrays(n_devices: int = 5, n_features: int = 4, sequence_bounds = (10, 20)) -> List[ndarray]:
    "Create five devices / entities (of n_features) with sequence lengths between sequence_bounds"
    dfs = [
        DataFrame(
            np_random.random(size=(np_random.randint(sequence_bounds[0], sequence_bounds[1]), n_features)), columns=list(ascii_letters[:n_features])
        )
        for _ in range(n_devices)
    ]
    # return dfs
    return [x.values for x in dfs]

[shape(array) for array in list_of_numpy_arrays()]

[(16, 4), (13, 4), (15, 4), (19, 4), (11, 4)]

In [6]:
[shape(array) for array in list_of_numpy_arrays(n_devices=10, n_features=20)]

[(11, 20),
 (10, 20),
 (12, 20),
 (15, 20),
 (10, 20),
 (18, 20),
 (19, 20),
 (15, 20),
 (11, 20),
 (10, 20)]

### Functional invariant is held

In [7]:
test_sequence_np = list_of_numpy_arrays()
print([shape(array) for array in test_sequence_np])
max_length = max(arr.shape[0] for arr in test_sequence_np)
print(f"{max_length = }")

[(14, 4), (15, 4), (12, 4), (17, 4), (19, 4)]
max_length = 19


In [8]:
padded, lengths = providence_pad_sequence_old(test_sequence_np)
assert padded.shape == (max_length, len(test_sequence_np), test_sequence_np[0].shape[-1])

In [9]:
padded, lengths = providence_pad_sequence(test_sequence_np)
assert padded.shape == (max_length, len(test_sequence_np), test_sequence_np[0].shape[-1])

### (Actual) time trial: small data

In [10]:
%timeit padded, lengths = providence_pad_sequence_old(test_sequence_np)

84 µs ± 6.93 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [11]:
%timeit padded, lengths = providence_pad_sequence(test_sequence_np)

58.5 µs ± 420 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### time trial: realistic data - Backblaze

In [12]:
test_sequence_np_realistic = list_of_numpy_arrays(n_devices=700, n_features=20, sequence_bounds=(30, 100))

In [13]:
%timeit padded, lengths = providence_pad_sequence_old(test_sequence_np_realistic)

12.9 ms ± 1.15 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
%timeit padded, lengths = providence_pad_sequence(test_sequence_np_realistic)

9.3 ms ± 58.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### time trial: realistic data - NASA

In [15]:
test_sequence_np_realistic = list_of_numpy_arrays(n_devices=100, n_features=26, sequence_bounds=(200, 300))

In [16]:
%timeit padded, lengths = providence_pad_sequence_old(test_sequence_np_realistic)

3.19 ms ± 18.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [17]:
%timeit padded, lengths = providence_pad_sequence(test_sequence_np_realistic)

2.91 ms ± 66.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### time trial: big data

In [18]:
test_sequence_np_big = list_of_numpy_arrays(n_devices=200, n_features=52, sequence_bounds=(100, 500))

In [19]:
%timeit padded, lengths = providence_pad_sequence_old(test_sequence_np_big)

12.7 ms ± 266 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
%timeit padded, lengths = providence_pad_sequence(test_sequence_np_big)

19.7 ms ± 267 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## PyTorch Tensors

In [21]:
import torch as pt

In [22]:
def list_of_tensors(n_devices: int = 5, n_features: int = 4, sequence_bounds = (10, 20)) -> List[pt.Tensor]:
    "Create five devices / entities (of n_features) with sequence lengths between sequence_bounds"
    tens = [
        pt.rand(np_random.randint(sequence_bounds[0], sequence_bounds[1]), n_features)
        for _ in range(n_devices)
    ]
    return tens

[tens.size() for tens in list_of_tensors()]

[torch.Size([16, 4]),
 torch.Size([16, 4]),
 torch.Size([11, 4]),
 torch.Size([10, 4]),
 torch.Size([17, 4])]

### Time trial: small data

In [23]:
test_sequence_pt = list_of_tensors()

In [24]:
%timeit padded, lengths = providence_pad_sequence_old(test_sequence_pt)

61.5 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [25]:
%timeit padded, lengths = providence_pad_sequence(test_sequence_pt)

27.7 µs ± 392 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Time trial: realistic data - Backblaze

In [26]:
test_sequence_pt_realistic = list_of_tensors(n_devices=700, n_features=20, sequence_bounds=(30, 100))

In [27]:
%timeit padded, lengths = providence_pad_sequence_old(test_sequence_pt_realistic)

9.49 ms ± 97 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [28]:
%timeit padded, lengths = providence_pad_sequence(test_sequence_pt_realistic)

4.26 ms ± 94.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Time trial: realistic data - NASA

In [29]:
test_sequence_pt_realistic = list_of_tensors(n_devices=100, n_features=26, sequence_bounds=(200, 300))

In [30]:
%timeit -n 1 -r 1000 padded, lengths = providence_pad_sequence_old(test_sequence_pt_realistic)

The slowest run took 7.47 times longer than the fastest. This could mean that an intermediate result is being cached.
2.46 ms ± 764 µs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [31]:
%timeit -n 1 -r 1000 padded, lengths = providence_pad_sequence(test_sequence_pt_realistic)

1.34 ms ± 285 µs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


### Time trial: realistic data - NASA AGG

In [32]:
test_sequence_pt_realistic = list_of_tensors(n_devices=700, n_features=26, sequence_bounds=(200, 300))

In [33]:
%timeit -n 1 -r 1000 padded, lengths = providence_pad_sequence_old(test_sequence_pt_realistic)

17 ms ± 1.02 ms per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [34]:
%timeit -n 1 -r 1000 padded, lengths = providence_pad_sequence(test_sequence_pt_realistic)

10.4 ms ± 978 µs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


### Time trial: big data

In [35]:
test_sequence_pt_big = list_of_tensors(n_devices=700, n_features=50, sequence_bounds=(300, 700))

In [36]:
%timeit -n 1 -r 1000 padded, lengths = providence_pad_sequence_old(test_sequence_pt_big)

45.2 ms ± 3.83 ms per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [37]:
%timeit -n 1 -r 1000 padded, lengths = providence_pad_sequence(test_sequence_pt_big)

30.7 ms ± 2.17 ms per loop (mean ± std. dev. of 1000 runs, 1 loop each)


### Time trial: ridiculous data?

In [38]:
test_sequence_pt_big = list_of_tensors(n_devices=1000, n_features=52, sequence_bounds=(4000, 7000))

In [39]:
%timeit -n 1 -r 100 padded, lengths = providence_pad_sequence_old(test_sequence_pt_big)

The slowest run took 5.70 times longer than the fastest. This could mean that an intermediate result is being cached.
699 ms ± 286 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)


In [40]:
%timeit -n 1 -r 100 padded, lengths = providence_pad_sequence(test_sequence_pt_big)

490 ms ± 27.4 ms per loop (mean ± std. dev. of 100 runs, 1 loop each)
