# Direct chunk writing with LZF

In [1]:
import h5py
import numpy
import lzf

In [2]:
filename = 'test_direct_chunk_write_LZF.hdf5'
filehandle = h5py.File(filename, "w")

In [3]:
dataset = filehandle.create_dataset("data", (100, 100, 100), maxshape=(None, 100, 100), compression="lzf", chunks=(1,100,100), dtype='float32')

In [4]:
# Create random numbers
array = numpy.random.rand(100, 100)
array = array.astype('float32')

In [5]:
array

array([[ 0.72920966,  0.48528302,  0.47450361, ...,  0.3657198 ,
         0.4099254 ,  0.33775026],
       [ 0.79744917,  0.68799019,  0.32388309, ...,  0.7443561 ,
         0.00936193,  0.31286716],
       [ 0.33367997,  0.40615857,  0.76277214, ...,  0.73828369,
         0.92175829,  0.59224135],
       ..., 
       [ 0.28518656,  0.9538936 ,  0.33366209, ...,  0.09870639,
         0.22760418,  0.60984403],
       [ 0.481316  ,  0.10145833,  0.40116927, ...,  0.27736571,
         0.62882042,  0.58079082],
       [ 0.6796186 ,  0.17192715,  0.89119798, ...,  0.01405114,
         0.64849418,  0.21691272]], dtype=float32)

In [6]:
compressed = lzf.compress(array, 1000000000000)

In [7]:
# Compressed size
len(compressed)

40970

In [8]:
# Actual size data
array.nbytes

40000

In [9]:
compressed = numpy.frombuffer(compressed, dtype=numpy.dtype('u1'))

In [10]:
# Direct chunk write
filter_number = 32000
index = 0
# filehandle['data'].id.write_direct_chunk((index, 0, 0), array)
dataset.id.write_direct_chunk((index, 0, 0), compressed.tobytes(), filter_mask=filter_number)

In [11]:
dataset[0]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [12]:
filehandle['data'][0]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [13]:
filehandle.close()

In [14]:
filehandle = h5py.File(filename, "r")

In [15]:
filehandle['data'][0]

array([[ 0.72920966,  0.48528302,  0.47450361, ...,  0.3657198 ,
         0.4099254 ,  0.33775026],
       [ 0.79744917,  0.68799019,  0.32388309, ...,  0.7443561 ,
         0.00936193,  0.31286716],
       [ 0.33367997,  0.40615857,  0.76277214, ...,  0.73828369,
         0.92175829,  0.59224135],
       ..., 
       [ 0.28518656,  0.9538936 ,  0.33366209, ...,  0.09870639,
         0.22760418,  0.60984403],
       [ 0.481316  ,  0.10145833,  0.40116927, ...,  0.27736571,
         0.62882042,  0.58079082],
       [ 0.6796186 ,  0.17192715,  0.89119798, ...,  0.01405114,
         0.64849418,  0.21691272]], dtype=float32)

In [16]:
filehandle['data']._filters

{'lzf': None}

In [17]:
filehandle['data'].shuffle

False

In [18]:
# Checking for sameness
if (array == filehandle['data'][0]).all():
    print('same')

same
