In [2]:
import tensorflow as tf
import numpy as np

2023-11-28 15:33:07.521897: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-28 15:33:07.576478: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-28 15:33:07.577045: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [25]:
class AmsSketch:
    """
    AMS Sketch class for approximate second moment estimation.
    """

    def __init__(self, depth=5, width=250):
        self.depth = tf.constant(depth)
        self.width = tf.constant(width)
        self.F = tf.random.uniform(shape=(6, depth), minval=0, maxval=(1 << 31) - 1, dtype=tf.int32)
        self.zeros_sketch = tf.zeros(shape=(self.depth, self.width), dtype=tf.float32)

        self.precomputed_dict = {}

    def precompute(self, d):
        pos_tensor = self.tensor_hash31(tf.range(d), self.F[0], self.F[1]) % self.width  # shape=(d, 5)

        self.precomputed_dict[('four', d)] = tf.cast(self.tensor_fourwise(tf.range(d)),
                                                     dtype=tf.float32)  # shape=(d, 5)

        range_tensor = tf.range(self.depth)  # shape=(5,)

        # Expand dimensions to create a 2D tensor with shape (1, `self.depth`)
        range_tensor_expanded = tf.expand_dims(range_tensor, 0)  # shape=(1, 5)

        # Use tf.tile to repeat the range `d` times
        repeated_range_tensor = tf.tile(range_tensor_expanded, [d, 1])  # shape=(d, 5)

        # shape=(`d`, `self.depth`, 2)
        self.precomputed_dict[('indices', d)] = tf.stack([repeated_range_tensor, pos_tensor],
                                                         axis=-1)  # shape=(d, 5, 2)

    @staticmethod
    def hash31(x, a, b):
        r = a * x + b
        fold = tf.bitwise.bitwise_xor(tf.bitwise.right_shift(r, 31), r)
        return tf.bitwise.bitwise_and(fold, 2147483647)

    @staticmethod
    def tensor_hash31(x, a, b):  # GOOD
        """ Assumed that x is tensor shaped (d,) , i.e., a vector (for example, indices, i.e., tf.range(d)) """

        # Reshape x to have an extra dimension, resulting in a shape of (k, 1)
        x_reshaped = tf.expand_dims(x, axis=-1)

        # shape=(`v_dim`, 7)
        r = tf.multiply(a, x_reshaped) + b

        fold = tf.bitwise.bitwise_xor(tf.bitwise.right_shift(r, 31), r)

        return tf.bitwise.bitwise_and(fold, 2147483647)

    def tensor_fourwise(self, x):
        """ Assumed that x is tensor shaped (d,) , i.e., a vector (for example, indices, i.e., tf.range(d)) """

        # 1st use the tensor hash31
        in1 = self.tensor_hash31(x, self.F[2], self.F[3])  # shape = (`x_dim`,  `self.depth`)

        # 2st use the tensor hash31
        in2 = self.tensor_hash31(x, in1, self.F[4])  # shape = (`x_dim`,  `self.depth`)

        # 3rd use the tensor hash31
        in3 = self.tensor_hash31(x, in2, self.F[5])  # shape = (`x_dim`,  `self.depth`)

        in4 = tf.bitwise.bitwise_and(in3, 32768)  # shape = (`x_dim`,  `self.depth`)

        return 2 * (tf.bitwise.right_shift(in4, 15)) - 1  # shape = (`x_dim`,  `self.depth`)

    def fourwise(self, x):
        result = 2 * (tf.bitwise.right_shift(tf.bitwise.bitwise_and(
            self.hash31(self.hash31(self.hash31(x, self.F[2], self.F[3]), x, self.F[4]), x, self.F[5]), 32768), 15)) - 1
        return result

    def sketch_for_vector(self, v):
        """ Extremely efficient computation of sketch with only using tensors.

        Args:
        - v (tf.Tensor): Vector to sketch. Shape=(d,).

        Returns:
        - tf.Tensor: An AMS - Sketch. Shape=(`depth`, `width`).
        """

        d = v.shape[0]

        if ('four', d) not in self.precomputed_dict:
            self.precompute(d)

        return self._sketch_for_vector(v, self.precomputed_dict[('four', d)], self.precomputed_dict[('indices', d)])

    @tf.function
    def _sketch_for_vector(self, v, four, indices):
        v_expand = tf.expand_dims(v, axis=-1)  # shape=(d, 1)

        # shape=(d, 5): +- for each value v_i , i = 1, ..., d
        deltas_tensor = tf.multiply(four, v_expand)

        sketch = tf.tensor_scatter_nd_add(self.zeros_sketch, indices, deltas_tensor)  # shape=(5, 250)

        return sketch

    @staticmethod
    def estimate_euc_norm_squared(sketch):
        """ Estimate the Euclidean norm squared of a vector using its AMS sketch.

        Args:
        - sketch (tf.Tensor): AMS sketch of a vector. Shape=(`depth`, `width`).

        Returns:
        - tf.Tensor: Estimated squared Euclidean norm.
        """

        norm_sq_rows = tf.reduce_sum(tf.square(sketch), axis=1)
        return np.median(norm_sq_rows)

In [26]:
adv_n = 2592202

In [27]:
lenet_n = 61706

In [28]:
ams_sketch = AmsSketch()

In [29]:
ams_sketch.precompute(adv_n)

2023-11-28 15:44:18.590965: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 103688080 exceeds 10% of free system memory.


In [30]:
ams_sketch.precompute(lenet_n)

In [31]:
ams_sketch.precomputed_dict

{('four',
  2592202): <tf.Tensor: shape=(2592202, 5), dtype=float32, numpy=
 array([[-1., -1., -1.,  1.,  1.],
        [ 1., -1.,  1., -1.,  1.],
        [-1., -1.,  1., -1.,  1.],
        ...,
        [ 1.,  1., -1., -1., -1.],
        [ 1.,  1.,  1.,  1., -1.],
        [ 1.,  1., -1.,  1., -1.]], dtype=float32)>,
 ('indices',
  2592202): <tf.Tensor: shape=(2592202, 5, 2), dtype=int32, numpy=
 array([[[  0, 235],
         [  1, 206],
         [  2,  26],
         [  3, 166],
         [  4,  46]],
 
        [[  0, 138],
         [  1, 235],
         [  2, 204],
         [  3, 195],
         [  4,  55]],
 
        [[  0,  33],
         [  1, 131],
         [  2, 132],
         [  3, 224],
         [  4, 231]],
 
        ...,
 
        [[  0,  28],
         [  1, 165],
         [  2, 196],
         [  3, 155],
         [  4, 234]],
 
        [[  0, 143],
         [  1,  61],
         [  2, 171],
         [  3, 184],
         [  4,  24]],
 
        [[  0, 230],
         [  1,  42],
      

In [20]:
import pickle

In [32]:
with open('ams_sketch_precomputed_dict.pkl', 'wb') as file:
    pickle.dump(ams_sketch.precomputed_dict, file)

In [3]:
import pickle

In [22]:
with open('ams_sketch.pkl', 'rb') as file:
    ams_sketch = pickle.load(file)

2023-11-28 15:34:44.618218: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 103688080 exceeds 10% of free system memory.


In [23]:
ams_sketch.sketch_for_vector(tf.random.normal([1000000,]))

<tf.Tensor: shape=(5, 250), dtype=float32, numpy=
array([[-120.02566 ,   17.28204 ,    0.857486, ...,   24.700958,
         -24.750376,   -7.923587],
       [ -41.559643,   58.537617,   -9.951746, ...,   69.4604  ,
         106.95741 ,   -6.74096 ],
       [-113.09492 ,  -51.7586  , -159.95277 , ...,   77.29765 ,
          46.447197,   64.9099  ],
       [ -43.806896,  -10.961367,  -13.029714, ...,   13.099599,
         -33.556877,  142.75073 ],
       [  17.091013,  -60.21259 ,   19.222637, ...,  -67.353386,
         -26.197866,   16.172062]], dtype=float32)>

In [16]:
ams_sketch.precomputed_dict['indices', 1000000]

<tf.Tensor: shape=(1000000, 5, 2), dtype=int32, numpy=
array([[[  0,  77],
        [  1,  49],
        [  2, 241],
        [  3,  60],
        [  4,  67]],

       [[  0,  26],
        [  1, 241],
        [  2, 141],
        [  3,  91],
        [  4,  82]],

       [[  0, 165],
        [  1, 183],
        [  2,  21],
        [  3, 173],
        [  4, 198]],

       ...,

       [[  0, 156],
        [  1, 169],
        [  2, 140],
        [  3,  64],
        [  4,  54]],

       [[  0,  35],
        [  1, 111],
        [  2, 242],
        [  3, 216],
        [  4, 226]],

       [[  0,  68],
        [  1,  53],
        [  2, 170],
        [  3,  48],
        [  4, 211]]], dtype=int32)>

In [15]:
ams_sketch.precomputed_dict['indices', 1000000]

<tf.Tensor: shape=(1000000, 5, 2), dtype=int32, numpy=
array([[[  0,  77],
        [  1,  49],
        [  2, 241],
        [  3,  60],
        [  4,  67]],

       [[  0,  26],
        [  1, 241],
        [  2, 141],
        [  3,  91],
        [  4,  82]],

       [[  0, 165],
        [  1, 183],
        [  2,  21],
        [  3, 173],
        [  4, 198]],

       ...,

       [[  0, 156],
        [  1, 169],
        [  2, 140],
        [  3,  64],
        [  4,  54]],

       [[  0,  35],
        [  1, 111],
        [  2, 242],
        [  3, 216],
        [  4, 226]],

       [[  0,  68],
        [  1,  53],
        [  2, 170],
        [  3,  48],
        [  4, 211]]], dtype=int32)>

In [24]:
import dill

ModuleNotFoundError: No module named 'dill'

In [33]:
with open('ams_sketch_precomputed_dict.pkl', 'rb') as file:
    ams_sketch_precomputed_dict = pickle.load(file)

2023-11-28 15:45:13.296495: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 103688080 exceeds 10% of free system memory.


In [34]:
ams_sketch_precomputed_dict

{('four',
  2592202): <tf.Tensor: shape=(2592202, 5), dtype=float32, numpy=
 array([[-1., -1., -1.,  1.,  1.],
        [ 1., -1.,  1., -1.,  1.],
        [-1., -1.,  1., -1.,  1.],
        ...,
        [ 1.,  1., -1., -1., -1.],
        [ 1.,  1.,  1.,  1., -1.],
        [ 1.,  1., -1.,  1., -1.]], dtype=float32)>,
 ('indices',
  2592202): <tf.Tensor: shape=(2592202, 5, 2), dtype=int32, numpy=
 array([[[  0, 235],
         [  1, 206],
         [  2,  26],
         [  3, 166],
         [  4,  46]],
 
        [[  0, 138],
         [  1, 235],
         [  2, 204],
         [  3, 195],
         [  4,  55]],
 
        [[  0,  33],
         [  1, 131],
         [  2, 132],
         [  3, 224],
         [  4, 231]],
 
        ...,
 
        [[  0,  28],
         [  1, 165],
         [  2, 196],
         [  3, 155],
         [  4, 234]],
 
        [[  0, 143],
         [  1,  61],
         [  2, 171],
         [  3, 184],
         [  4,  24]],
 
        [[  0, 230],
         [  1,  42],
      

In [35]:
am = AmsSketch()

In [36]:
am.precomputed_dict = ams_sketch_precomputed_dict

In [37]:
am.precomputed_dict

{('four',
  2592202): <tf.Tensor: shape=(2592202, 5), dtype=float32, numpy=
 array([[-1., -1., -1.,  1.,  1.],
        [ 1., -1.,  1., -1.,  1.],
        [-1., -1.,  1., -1.,  1.],
        ...,
        [ 1.,  1., -1., -1., -1.],
        [ 1.,  1.,  1.,  1., -1.],
        [ 1.,  1., -1.,  1., -1.]], dtype=float32)>,
 ('indices',
  2592202): <tf.Tensor: shape=(2592202, 5, 2), dtype=int32, numpy=
 array([[[  0, 235],
         [  1, 206],
         [  2,  26],
         [  3, 166],
         [  4,  46]],
 
        [[  0, 138],
         [  1, 235],
         [  2, 204],
         [  3, 195],
         [  4,  55]],
 
        [[  0,  33],
         [  1, 131],
         [  2, 132],
         [  3, 224],
         [  4, 231]],
 
        ...,
 
        [[  0,  28],
         [  1, 165],
         [  2, 196],
         [  3, 155],
         [  4, 234]],
 
        [[  0, 143],
         [  1,  61],
         [  2, 171],
         [  3, 184],
         [  4,  24]],
 
        [[  0, 230],
         [  1,  42],
      

In [38]:
am.sketch_for_vector(tf.random.normal([61706,]))

<tf.Tensor: shape=(5, 250), dtype=float32, numpy=
array([[  2.1336153 ,  -8.10824   ,  24.406     , ...,  -1.7841159 ,
         -6.759145  , -21.470701  ],
       [  8.225737  , -27.808575  ,  15.916177  , ...,  -2.4987457 ,
         11.953768  ,  -4.5171733 ],
       [  4.877473  , -27.646666  ,   4.278938  , ...,  -7.4662967 ,
        -17.915205  , -14.032585  ],
       [  2.4250946 ,  -4.3206472 ,  20.050192  , ...,  23.823198  ,
        -15.164493  , -16.39989   ],
       [ -0.82484436,  24.153524  ,  -8.369446  , ...,  -8.719382  ,
        -26.020844  ,   5.892068  ]], dtype=float32)>

In [39]:
len(am.precomputed_dict)

4