In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

In [3]:
import keras_hrp as khrp
import tensorflow as tf
import numpy as np

## Prototyping with numpy
Given the random hyperplane $W_{hyper} \in \mathbb{R}^{n \times m}$, 
an input vector $x \in \mathbb{R}^{n}$, and an activation function $h(.)$ to compute hashes or resp. a binary representation $b \in \mathbb{B}^{m}$.

$$
h(z) = 
\begin{cases}
  \text{True}  & \text{if } z > 0 \\
  \text{False} & \text{else}
\end{cases}
\\
b = h\left(x \cdot W_{hyper} \right)
$$


In [4]:
NUM_FEATURES = 64
OUTPUT_SIZE = 1024

# input
np.random.seed(42)
x = np.random.randn(NUM_FEATURES)

# random hyperplane
hyperplane = np.random.randn(NUM_FEATURES, OUTPUT_SIZE)
print(f"hyperplane: {hyperplane.shape}")

# the projection 'x*hyperplane'
projection = np.dot(x, hyperplane)

# The hash function
hashvalues = np.heaviside(projection, 0).astype(int)  # what we do
print(hashvalues)

hyperplane: (64, 1024)
[1 0 1 ... 1 0 0]


## The `HashedRandomProjection` class
The above code is equivalent to a non-trainable `Dense` layer and a heaviside `Lambda` layer in Keras, e.g.

```py
import tensorflow as tf
model = tf.keras.Sequential([
    tf.keras.layers.Dense(OUTPUT_SIZE, trainable=False),
    tf.keras.layers.Lambda(lambda x: tf.experimental.numpy.heaviside(x))
])
```

We implemented it as Keras layer `HashedRandomProjection`.

In [5]:
# same input as in previous chapter
x2 = tf.convert_to_tensor([x])

# same hyperplane as specified in previous chapter
layer = khrp.HashedRandomProjection(hyperplane=hyperplane)
hashvalues2 = layer(x2)

# is the result the same?
(hashvalues2[0].numpy() == hashvalues).all()

2022-06-30 12:29:45.943295: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


True

## Serialize Boolean List to Integer List
In some cases it is better 1x `int8` instead of 8x `bool`.
For example Python itself stores 1x `bool` as 8bit integer.

In [6]:
serialized = khrp.bool_to_int8( hashvalues2.numpy().astype(bool) )

print(f"number of integer numbers: {len(serialized)}")
serialized

number of integer numbers: 128


array([ -88, -117,   15,  -79,   42,  -84,  -49,  -99,   81, -118,  -57,
        -47,   -8,  126,  -79,  -10, -117,  -16,  -42,   24, -118,  121,
        -24,  124,   -4,   12,   30,    0, -112,  100,   48,   14,   38,
        -54,  -19,   13,   17,  106,   12,   53,   29,   97,   97,   30,
        -28, -105,  -30,  -74,  -59,  115,  -12,  -31,   -7,   19,  -52,
         76,   78,  -94,  -15,   23,   58, -104,  116,  -29,   96,  121,
        -78,   46,   39,   92,  -92, -122, -114, -115, -106, -124,   24,
        -73,  -73,  -83,  -23,   43,    6,  -33,  -37,    2,   88,    0,
         61,  -72,   62,  -77,   12,  -61,  -46,   99,   64, -105,   82,
         31,   46,   56,  -47,   55,  -95,  -58,   98,  102,  -76,    4,
         92,   42,  -83,   29,   94,  -66,   23,   64,  109,   -6,  -43,
        -40,   51,  -89,   99,  120,  -81, -124], dtype=int8)

And transforming it back:

In [7]:
hashvalues3 = khrp.int8_to_bool( serialized )

(hashvalues2[0].numpy() == hashvalues3).all()

True