In [1]:
from tensorflow import io, audio, keras
import pandas
import numpy
import os
import gzip
import zlib
from Brett import db, data_util
from functools import reduce, partial

In [2]:
compose = lambda *F: reduce(lambda f, g: lambda x: f(g(x)), F)

In [3]:
ROOT = '/scr-ssd/mimic/'
def get_rec_path(recID, part):
    return ROOT + part + '/' + str(recID[0]) + '_' + str(recID[1]).zfill(4)

def read_rec(recID, part='train'):
    return db.read_record(get_rec_path(recID, part))

In [4]:
encoding_path = lambda rec, ext: '/scr-ssd/tmp/' + rec.record_name + '.' + ext

def encode_recs(encode, recs, ext):
    for rec in recs:
        io.write_file(encoding_path(rec, ext), encode(rec.d_signal))

def decode_recs(decode, recs, ext):
    decode = compose(
        lambda x: x.numpy(), 
        decode, 
        io.read_file, 
        lambda rec: encoding_path(rec, ext)
    )
    return [decode(rec) for rec in recs]

def get_size_factor(recs, ext):
    factors = []
    for rec in recs:
        s1 = os.path.getsize(ROOT + 'train/' + rec.record_name + '_x.flac')
        s2 = os.path.getsize(encoding_path(rec, ext))
        factors.append(s2/s1)
    return numpy.round(numpy.mean(factors), 2)

def distance_decoded(decode, recs, ext):
    X = [rec.d_signal for rec in recs]
    X_ = decode_recs(decode, recs, ext)
    return sum(numpy.abs(x != x_).sum() for x, x_ in zip(X, X_))

def test_codec(recs, encode, decode, ext):
    print('Encoding Time')
    %time encode_recs(encode, recs, ext)
    print('\n' 'Size Factor:', get_size_factor(recs, ext))
    print('\n' 'Decoding Time')
    %time decode_recs(decode, recs, ext)
    print('\n' 'D(decoded, original):', distance_decoded(decode, recs, ext))

In [6]:
H = {
    'input_sigs': ['II', 'V', 'PLETH', 'RESP'],
    'max_sig_len': 125 * 60 * 60 * 24,
    'resp_scale': 5,
    'layer_count_a': 3,
    'layer_count_b': 5,
    'window_size': 512,
    'activation': 'relu',
    'dropout': 0.2,
    'filter_count': 128,
    'dense_units': 64,
    'kernel_size_a': 16,
    'stride_a': 4,
    'kernel_size_b': 4,
    'batch_size': 8,
    'batch_buffer_size': 2,
    'windows_per_record': 10,
    'optimizer': 'adam',
    'learning_rate': 3e-4,
    'pressure_smoothing_window': 300
}

metadata, pressures = data_util.load_metadata_and_pressures(H)

In [8]:
recIDs = metadata.sample(frac=1, random_state=7).index[:200]
display(recIDs[:5])
%time recs = [read_rec(i) for i in recIDs]
for rec in recs:
    rec.d_signal = db.to_digital(rec.p_signal, rec)

MultiIndex([(3189000,   10),
            (3987729,   11),
            (3334855,  805),
            (3357886,  729),
            (3472281, 1342)],
           names=['record_id', 'segment'])

CPU times: user 29.4 s, sys: 7.24 s, total: 36.7 s
Wall time: 36.7 s


### Serial

In [13]:
encode_serial = lambda x: io.serialize_tensor(x).numpy()
decode_serial = lambda z: io.parse_tensor(z, out_type='int16')
test_codec(recs, encode_serial, decode_serial, ext='serial')

Encoding Time
CPU times: user 2.78 s, sys: 5.66 s, total: 8.44 s
Wall time: 8.47 s

Size Factor: 3.46

Decoding Time
CPU times: user 2.47 s, sys: 3.66 s, total: 6.13 s
Wall time: 6.13 s

D(decoded, original): 0


### WAV

In [None]:
encode_wav = lambda x: audio.encode_wav(x / 2**15, 125)
decode_wav = lambda z: keras.backend.round(audio.decode_wav(z).audio * 2**15)
test_codec(recs, encode_wav, decode_wav, ext='wav')

Encoding Time
CPU times: user 12.8 s, sys: 7.81 s, total: 20.6 s
Wall time: 20.6 s

Size Factor: 3.46

Decoding Time


## Compressed

In [10]:
encode_serial = lambda x: io.serialize_tensor(x).numpy()
decode_serial = lambda z: io.parse_tensor(z, out_type='int16')
unzip = lambda t: lambda z: io.decode_compressed(z, compression_type=t)

### GZIP Serial

In [12]:
compress_gzip = lambda x: gzip.compress(x, compresslevel=6)
encode_gzip = compose(compress_gzip, encode_serial)
decode_gzip = compose(decode_serial, unzip('GZIP'))
test_codec(recs, encode_gzip, decode_gzip, ext='gzip')

Encoding Time
CPU times: user 2min 31s, sys: 5.6 s, total: 2min 36s
Wall time: 2min 36s

Size Factor: 2.23

Decoding Time
CPU times: user 21.6 s, sys: 4.7 s, total: 26.3 s
Wall time: 26.3 s

D(decoded, original): 0


### ZLIB Serial

In [11]:
compress_zlib = lambda x: zlib.compress(x, level=6)
encode_zlib = compose(compress_zlib, encode_serial)
decode_zlib = compose(decode_serial, unzip('ZLIB'))
test_codec(recs, encode_zlib, decode_zlib, ext='zlib')

Encoding Time
CPU times: user 2min 29s, sys: 4.8 s, total: 2min 33s
Wall time: 2min 33s

Size Factor: 2.23

Decoding Time
CPU times: user 20.3 s, sys: 4.57 s, total: 24.8 s
Wall time: 24.8 s

D(decoded, original): 0
