In [None]:
#default_exp tests

```c
#define RUN 255 
#define ESC 254 
int run_length_decode(unsigned char *dest, unsigned char *src, int nsrc, int thresh) { 
    int si=0,di=0,nrun; 
    unsigned char ib; 
    while (si < nsrc) { 
        ib = src[si++]; 
        if (ib == RUN) { 
            nrun = src[si++];
            while(nrun-- >0){ 
                dest[di++] = thresh; }
        } else if (ib == ESC) {
            /* next value is literal */ 
            dest[di++] = src[si++];
        } else {
            /* value */ 
            dest[di++] = ib; 
        } 
    } return di; 
} 

int run_length_encode(unsigned char *dest, unsigned char *src, int nsrc, int thresh) { 
    unsigned char ib; 
    int di = 0, si, nunder = 0; 
    for(si = 0; si < nsrc; si++){ 
        ib = src[si]; 
        if ((ib < thresh) && (si < (nsrc-1)) && (nunder < 250)) { 
            nunder += 1; 
        } else { 
            if (nunder > 0) { 
                dest[di++] = RUN; 
                dest[di++] = nunder; 
                nunder = 0; 
            } if ((ib == RUN) || (ib == ESC)) {
                dest[di++] = ESC; 
                dest[di++] = ib; 
            } else { 
                dest[di++] = ib; 
            } 
        } 
    } return di; 
}
```

In [None]:
#hide
%load_ext autoreload
%autoreload 2 

In [None]:
#export
import sys, os
#import pyximport 
from tqdm.notebook import tqdm
from pathlib import Path
# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))
from multiprocessing import set_start_method, Pool
try:
    set_start_method("spawn")
except RuntimeError:
    pass
import gc
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
from pprint import pprint as pp
from datetime import datetime as dt
from typing import *
import itertools
import rfpy
from rfpy.constants import *
from rfpy.parser import *
from rfpy.utils import *
from rfpy.blocks import *
from rfpy.main import export_metadata
from rfpy.parser import _extract_level
from rfpy.cyparser import cy_decode_blocks
from nbdev.showdoc import *
from fastcore.xtras import Path
from fastcore.foundation import L
from fastcore.test import *
from fastcore.parallel import parallel
import numpy as np
#pyximport.install(setup_args={"include_dirs":np.get_include()})
import pandas as pd

In [None]:
#export
def return_blocks(path='binfiles', i=0):
    entrada = Path(path)
    arquivos = get_files(entrada)
    return parse_bin(arquivos[i])

### Original Algorithm

In [None]:
#slow
def test_decompress_orig(blocks):
    RUN = 255
    ESC = 254
    offset = blocks[0].offset
    MIN = offset - 127.5
    decoded = []

    for block in tqdm(blocks):
        src = block.data[block.start:block.stop]
        nsrc = len(src) #block.stop - block.start
        thresh = block.thresh
        i = 0
        j = 0
        dest = []
        while i < nsrc:
            ib = src[i] 
            i+=1
            if ib == RUN:
                nrun = src[i] 
                i+=1
                while nrun > 0:
                    dest.append(MIN + thresh/2)
                    j+=1
                    nrun-=1
            elif ib == ESC:
                # next value is literal
                dest.append(MIN + src[i]/2)
                i+=1 ; j+=1
            else:
                # value
                dest.append(MIN + ib/2)
                j+=1
    return dest

### Internal `while` loop eliminated

In [None]:
#slow
def test_decompress_nowhile(blocks):
    decoded = []
    offset = blocks[0].offset
    MIN = offset - 127.5

    for block in tqdm(blocks):
        src = block.data[block.start:block.stop]
        nsrc = len(src) #block.stop - block.start
        thresh = block.thresh
        i = 0
        j = 0
        dest = []
        while i < nsrc:
            ib = src[i] 
            i+=1
            if ib == RUN:
                nrun = src[i] 
                i+=1
                j+=nrun
                dest.extend([MIN + thresh/2.]*nrun)                
            elif ib == ESC:
                # next value is literal
                dest.append(MIN+src[i]/2.)
                i+=1 ; j+=1
            else:
                # value
                dest.append(MIN+ib/2.)
                j+=1
        decoded.extend(dest)
    return decoded

### Switch from lists to preallocated arrays
row-wise allocation

In [None]:
#slow
def test_prealloc_np(blocks):
    decoded = np.empty((len(blocks), blocks[0].norig), dtype=np.float16)
    decoded.fill(MIN)

    for b, block in enumerate(tqdm(blocks)):
        src = block.data[block.start:block.stop]
        nsrc = len(src)
        thresh = block.thresh
        dest = np.empty(block.norig, dtype=np.float16)
        dest.fill(MIN)
        i = 0
        j = 0
        while i < nsrc:
            ib = src[i] 
            i+=1
            if ib == RUN:
                nrun = src[i] 
                i+=1
                dest[j:j+nrun] = MIN + thresh/2.
                j+=nrun
            elif ib == ESC:
                # next value is literal
                dest[j] = MIN + src[i]/2.
                i+=1 ; j+=1
            else:
                # value
                dest[j] = MIN + ib/2.
                j+=1
        decoded[b] = dest
    return decoded

Explicit the column order to be "column-wise" 

In [None]:
#slow
def test_prealloc_np_col(blocks):    
    decoded = np.empty((blocks[0].norig, len(blocks)), dtype=np.float16, order='F')
    decoded.fill(MIN)

    for b, block in enumerate(tqdm(blocks)):
        src = block.data[block.start:block.stop]
        nsrc = len(src)
        thresh = block.thresh
        dest = np.empty(block.norig, dtype=np.float16, order='F')
        dest.fill(MIN)
        i = 0
        j = 0
        while i < nsrc:
            ib = src[i] 
            i+=1
            if ib == RUN:
                nrun = src[i] 
                i+=1
                dest[j:j+nrun] = MIN + thresh / 2.
                j+=nrun
            elif ib == ESC:
                # next value is literal
                dest[j] = MIN + src[i] / 2.
                i+=1 ; j+=1
            else:
                # value
                dest[j] = MIN + ib / 2.
                j+=1
        decoded[:, b] = dest
        #decoded = np.clip(decoded, MIN, offset) # Eric is using????
    return decoded

## Vectorize Offset

In [None]:
#slow
def test_prealloc_np_offset(blocks):
    decoded = np.empty((len(blocks), blocks[0].norig), dtype=np.float16)
    decoded.fill(MIN)

    for b, block in enumerate(tqdm(blocks)):
        src = block.data[block.start:block.stop]
        nsrc = len(src)
        thresh = block.thresh
        dest = np.empty(block.norig, dtype=np.float16)
        dest.fill(MIN)
        i = 0
        j = 0
        while i < nsrc:
            ib = src[i] 
            i+=1
            if ib == RUN:
                nrun = src[i] 
                i+=1
                dest[j:j+nrun] = thresh
                j+=nrun
            elif ib == ESC:
                # next value is literal
                dest[j] = src[i]
                i+=1 ; j+=1
            else:
                # value
                dest[j] = ib
                j+=1
        decoded[b] = dest
    decoded = decoded/2. + MIN
    return decoded

### Create pre-filled array

In [None]:
#slow
def test_prealloc_fill(blocks):
    decoded = np.full((len(blocks), blocks[0].norig), MIN, dtype=np.float16)

    for b, block in enumerate(tqdm(blocks)):
        src = block.data[block.start:block.stop]
        nsrc = len(src)
        thresh = block.thresh
        dest = np.full(block.norig, MIN, dtype=np.float16)
        i = 0
        j = 0
        while i < nsrc:
            ib = src[i] 
            i+=1
            if ib == RUN:
                nrun = src[i] 
                i+=1
                dest[j:j+nrun] = MIN + thresh/2.
                j+=nrun
            elif ib == ESC:
                # next value is literal
                dest[j] = MIN + src[i]/2.
                i+=1 ; j+=1
            else:
                # value
                dest[j] = MIN + ib/2.
                j+=1
        decoded[b] = dest
    return decoded

## Functional Programming - Much Worse
Elimination of loops doesn't necessarily really eliminates them. `filter` and `map` run along the array

In [None]:
#slow
def test_prealloc_functional(blocks):
    decoded = np.full((len(blocks), blocks[0].norig), MIN, dtype=np.float16)
    MAX = blocks[0].norig
    for b, block in enumerate(tqdm(blocks)):
        src = block.data[block.start:block.stop]
        threshold = block.thresh
        dest = np.concatenate(L(src.split(b'\xff')).filter(lambda o: o != b'').map(lambda o: np.concatenate([np.repeat(threshold,o[0]),  np.fromiter(o[1:].replace(b'\xfe', b''), dtype=np.float16, count=len(o[1:]))])))
        decoded[b][:dest.shape[0]] = dest[:MAX]
    return decoded

## Multiprocessing with prealocated array and maximum elimination of operations - Best Results
May not be feasible given the potential deadlocks

In [None]:
#slow
def test_prealloc_mp(blocks):
    decoded = np.full((len(blocks), blocks[0].norig), MIN, dtype=np.float16)
    block_array = [(a,b) for a,b in zip(decoded, blocks)]
    with Pool(processes=os.cpu_count()) as pool:
        pool.map(decode_blocks, block_array)
    return decoded

In [None]:
# %%time
# decoded = test_prealloc_mp(compressed_blocks)

In [None]:
%%time
blocks = parse_bin('binfiles/rfeye002092_210223_T163131_MaskBroken.bin') ; blocks

Wall time: 10.4 s


{'file_version': 23,
 'string': 'CRFS DATA FILE V023',
 'blocks': defaultdict(fastcore.foundation.L,
             {(21,
               0): (#1) [<rfpy.blocks.DType21 object at 0x00000231ABE6E548>],
              (42,
               0): (#2) [<rfpy.blocks.DType42 object at 0x00000231AEB995C8>,<rfpy.blocks.DType42 object at 0x00000231AEC47548>],
              (42,
               301): (#1) [<rfpy.blocks.DType42 object at 0x00000231ABE59B48>],
              (42,
               311): (#1) [<rfpy.blocks.DType42 object at 0x00000231AEC52688>],
              (42,
               321): (#1) [<rfpy.blocks.DType42 object at 0x00000231AC0074C8>],
              (42,
               331): (#1) [<rfpy.blocks.DType42 object at 0x00000231AEC6BA48>],
              (68,
               331): (#734) [<rfpy.blocks.DType68 object at 0x00000231AEC6BB88>,<rfpy.blocks.DType68 object at 0x00000231AEC95308>,<rfpy.blocks.DType68 object at 0x00000231AE900D08>,<rfpy.blocks.DType68 object at 0x00000231C4F75308>,<rfpy.

In [None]:
compressed_blocks = blocks['blocks'][(68,301)]

In [None]:
#%load_ext Cython

In [None]:
# %%cython

# cimport cython

# import numpy as np
# cimport numpy as np
# #from cython.parallel import prange

# #ctypedef np.double_t DTYPE_t

# @cython.boundscheck(False)
# @cython.wraparound(False)
# cpdef object cy_decode_block(block):
#     cdef float MIN = block.offset - 127.5
#     cdef int RUN = 255
#     cdef int ESC = 254
#     cdef const unsigned char[:] src = block.data[block.start:block.stop]
#     src = block.data[block.start:block.stop]
#     cdef int nsrc = len(src)
#     cdef int thresh = block.thresh
#     cdef np.ndarray dest =  np.full(block.norig, MIN, dtype=np.float16)
#     cdef int i = 0
#     cdef int j = 0
#     while i < nsrc:
#         ib = src[i] 
#         i+=1
#         if ib == RUN:
#             nrun = src[i] 
#             i+=1
#             dest[j:j+nrun] = MIN + thresh/2.
#             j+=nrun
#         elif ib == ESC:
#             # next value is literal
#             dest[j] = MIN + src[i]/2.
#             i+=1 ; j+=1
#         else:
#             # value
#             dest[j] = MIN + ib/2.
#             j+=1
#     return dest

In [None]:
# %%cython --annotate

# cimport cython

# import numpy as np
# cimport numpy as np
# #from cython.parallel import prange

# #ctypedef np.double_t DTYPE_t

# @cython.boundscheck(False)
# @cython.wraparound(False)
# cpdef object cy_decode_blocks(list blocks):
#     cdef float offset = blocks[0].offset
#     cdef float MIN = offset - 127.5
#     cdef int rows = len(blocks)
#     cdef int columns = blocks[0].norig
#     cdef np.ndarray decoded = np.full((rows, columns), MIN, dtype=np.float16)
#     cdef int RUN = 255
#     cdef int ESC = 254
#     cdef object block
#     cdef int row
#     cdef const unsigned char[:] src
#     cdef int nsrc
#     cdef int thresh = blocks[0].thresh
#     #cdef np.ndarray dest
#     cdef int i
#     cdef int j
#     for row, block in enumerate(blocks):
#         src = block.data[block.start:block.stop]
#         nsrc = len(src)
#         i = 0
#         j = 0
#         while i < nsrc:
#             ib = src[i] 
#             i+=1
#             if ib == RUN:
#                 nrun = src[i] 
#                 i+=1
#                 decoded[row, j:j+nrun] = MIN + thresh/2.
#                 j+=nrun
#             elif ib == ESC:
#                 # next value is literal
#                 decoded[row, j] = MIN + src[i]/2.
#                 i+=1 ; j+=1
#             else:
#                 # value
#                 decoded[row, j] = MIN + ib/2.
#                 j+=1
#     return decoded

In [None]:
# %%cython --annotate

# cimport cython

# import numpy as np
# cimport numpy as np
# #from cython.parallel import prange

# #ctypedef np.double_t DTYPE_t

# @cython.boundscheck(False)
# @cython.wraparound(False)
# cpdef object cy_decode_blocks(list blocks):
#     cdef float offset = blocks[0].offset
#     cdef float MIN = offset - 127.5
#     cdef int rows = len(blocks)
#     cdef int columns = blocks[0].norig
#     cdef np.ndarray decoded = np.full((rows, columns), MIN, dtype=np.float16)
#     cdef list data = [block.data[block.start:block.stop] for block in blocks]
#     cdef int RUN = 255
#     cdef int ESC = 254
#     cdef int row
#     cdef const unsigned char[:] src
#     cdef int nsrc
#     cdef int thresh = blocks[0].thresh
#     cdef int i
#     cdef int j
#     for row in range(rows):
#         src = data[row]
#         nsrc = len(src)
#         i = 0
#         j = 0
#         while i < nsrc:
#             ib = src[i] 
#             i+=1
#             if ib == RUN:
#                 nrun = src[i] 
#                 i+=1
#                 decoded[row, j:j+nrun] = MIN + thresh/2.
#                 j+=nrun
#             elif ib == ESC:
#                 # next value is literal
#                 decoded[row, j] = MIN + src[i]/2.
#                 i+=1 ; j+=1
#             else:
#                 # value
#                 decoded[row, j] = MIN + ib/2.
#                 j+=1
#     return decoded

In [None]:
# %%cython --annotate

# cimport cython

# import numpy as np
# cimport numpy as np
# #from cython.parallel import prange

# #ctypedef np.float_t DTYPE_t

# @cython.boundscheck(False)
# @cython.wraparound(False)
# cpdef object cy_decode_blocks(list blocks):
#     cdef float offset = blocks[0].offset
#     cdef float MIN = offset - 127.5
#     cdef int rows = len(blocks)
#     cdef int columns = blocks[0].norig
#     cdef np.ndarray decoded = np.full((rows, columns), MIN, dtype=np.float16)
#     #cdef DTYPE_t [:, :] decoded_v = decoded
#     cdef list data = [b.data[b.start:b.stop] for b in blocks]
#     cdef int RUN = 255
#     cdef int ESC = 254
#     cdef const unsigned char[:] src
#     cdef int NRSC
#     cdef float thresh = blocks[0].thresh
#     cdef int i
#     cdef int j
#     cdef int ib
#     cdef int nrun
#     cdef Py_ssize_t row   
#     for row in range(rows):
#         src = data[row]
#         nsrc = len(src)
#         i = 0
#         j = 0
#         while i < nsrc:
#             ib = src[i] 
#             i+=1
#             if ib == RUN:
#                 nrun = src[i] 
#                 i+=1
#                 decoded[row, j:j+nrun] = MIN + thresh/2.
#                 j+=nrun
#             elif ib == ESC:
#                 # next value is literal
#                 decoded[row, j] = MIN + src[i]/2.
#                 i+=1 ; j+=1
#             else:
#                 # value
#                 decoded[row, j] = MIN + ib/2.
#                 j+=1
#     return decoded

In [None]:
def test_cython(blocks):
    return cy_decode_blocks(list(blocks))

In [None]:
def test_mp_cy(blocks):
    MIN = blocks[0].offset - 127.5
    decoded = np.full((len(blocks), blocks[0].norig), MIN, dtype=np.float16)
    block_array = [(a,b) for a,b in zip(decoded, blocks)]
    with Pool(processes=os.cpu_count()) as pool:
        pool.map(decode_blocks, block_array)
    return decoded

In [None]:
%%time
decoded = cy_decode_blocks(list(compressed_blocks))

Wall time: 13.5 s


In [None]:
del decoded
gc.collect()

154