In [5]:
from pyfastpfor import *
import numpy as np
# Get the list of all codecs
getCodecList()

['BP32',
 'copy',
 'fastbinarypacking16',
 'fastbinarypacking32',
 'fastbinarypacking8',
 'fastpfor128',
 'fastpfor256',
 'maskedvbyte',
 'newpfor',
 'optpfor',
 'pfor',
 'pfor2008',
 'simdbinarypacking',
 'simdfastpfor128',
 'simdfastpfor256',
 'simdgroupsimple',
 'simdgroupsimple_ringbuf',
 'simdnewpfor',
 'simdoptpfor',
 'simdpfor',
 'simdsimplepfor',
 'simple16',
 'simple8b',
 'simple8b_rle',
 'simple9',
 'simple9_rle',
 'simplepfor',
 'streamvbyte',
 'varint',
 'varintg8iu',
 'varintgb',
 'vbyte',
 'vsencoding']

In [6]:
arrSize = 128 * 32
maxVal = 2048
# 1. Example without data differencing

# All arrays the library use must be contiguous-memory C-style numpy arrays
inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')
inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')

# To be on the safe side, let's reserve plenty of additional memory:
# sometimes the size of compressed data is not smaller than the size 
# of the original one
inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')

# Obtain a codec by name
codec = getCodec('simdbinarypacking')

# Compress data
compSize = codec.encodeArray(inp, arrSize, inpComp, len(inpComp))
 
print('Compression ratio: %g' % (float(compSize)/arrSize))

# Decompress data
assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))
assert(np.all(inpCompDecomp == inp))

Compression ratio: 0.34668


In [7]:
arrSize = 128 * 32
maxVal = 1024 * 1024 * 1024 * 2

# 2. Example with slower data differencing

# All arrays the library use must be contiguous-memory C-style numpy arrays
inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')
inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')

inp.sort()
inpCopy = np.array(inp, copy = True, dtype = np.uint32, order = 'C')

# To be on the safe side, let's reserve plenty of additional memory:
# sometimes the size of compressed data is not smaller than the size 
# of the original one
inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')

# Carry out dafa differencing to convert a sorted sequence of large numbers
# into a sequence of small numbers (differences between adjacent numbers)
delta1(inpCopy, arrSize)


# Obtain a codec by name
codec = getCodec('simdbinarypacking')

# Compress data
compSize = codec.encodeArray(inpCopy, arrSize, inpComp, len(inpComp))
 
print('Compression ratio: %g' % (float(compSize)/arrSize))

# Decompress data
assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))
# Reverse differencing by computing the prefix sum
prefixSum1(inpCompDecomp, arrSize)

assert(np.all(inpCompDecomp == inp))

Compression ratio: 0.688477


In [8]:
arrSize = 128 * 32
maxVal = 1024 * 1024 * 1024 * 2

# 3. Example with faster but coarser data differencing

# All arrays the library use must be contiguous-memory C-style numpy arrays
inp = np.array(np.random.randint(0, maxVal, arrSize), dtype = np.uint32, order = 'C')
inpCompDecomp = np.zeros(arrSize, dtype = np.uint32, order = 'C')

inp.sort()
inpCopy = np.array(inp, copy = True, dtype = np.uint32, order = 'C')

# To be on the safe side, let's reserve plenty of additional memory:
# sometimes the size of compressed data is not smaller than the size 
# of the original one
inpComp = np.zeros(arrSize + 1024, dtype = np.uint32, order = 'C')

# Carry out dafa differencing to convert a sorted sequence of large numbers
# into a sequence of small numbers (differences between numbers that are 4 indices apart)
delta4(inpCopy, arrSize)


# Obtain a codec by name
codec = getCodec('simdbinarypacking')

# Compress data
compSize = codec.encodeArray(inpCopy, arrSize, inpComp, len(inpComp))
 
print('Compression ratio: %g' % (float(compSize)/arrSize))

# Decompress data
assert(arrSize == codec.decodeArray(inpComp, compSize, inpCompDecomp, arrSize))
# Reverse differencing by computing the prefix sum
prefixSum4(inpCompDecomp, arrSize)

assert(np.all(inpCompDecomp == inp))

Compression ratio: 0.717773
