UPDATE ON 2023/04/20

1. Aims at verifying the concept of spp
2. Tensorflow has provided the API of [spatial pyramid pooling](https://www.tensorflow.org/addons/api_docs/python/tfa/layers/SpatialPyramidPooling2D)

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
class Spatial_Pyramid_Pooling(tf.keras.layers.Layer):
    
    def __init__(self, pool_list = [1, 2, 4]):
        super(Spatial_Pyramid_Pooling, self).__init__()
        self.pool_list = pool_list
        self.out_len   = sum([p_size*p_size for p_size in pool_list])

    def call(self, x):
        
        #assume the input is channel last 
        #input = [batch, height, width, channels]
                
        n_batch = x.shape[0]
        n_ch    = x.shape[-1]
        
        #compute the number of cols/rows in one grid cell of each pooling
        n_cols_per_grid  = [tf.cast(x.shape[2]/p_size, tf.float32) for p_size in self.pool_list]
        n_rows_per_grid  = [tf.cast(x.shape[1]/p_size, tf.float32) for p_size in self.pool_list]
        
        outs = [None]*self.out_len
        cnt = 0
        for i, p_size in enumerate(self.pool_list):
            for j in range(p_size):
                for k in range(p_size):
                    c0 = j*n_cols_per_grid[i]
                    c1 = c0 + n_cols_per_grid[i]
                    r0 = k*n_rows_per_grid[i]
                    r1 = r0 + n_rows_per_grid[i]
                    
                    c0, c1 = tf.round(c0), tf.round(c1)
                    r0, r1 = tf.round(r0), tf.round(r1)
                    c0, c1 = tf.cast(c0, tf.int32), tf.cast(c1, tf.int32)
                    r0, r1 = tf.cast(r0, tf.int32), tf.cast(r1, tf.int32)
                               
                    new_shape = [n_batch, 
                                 r1 - r0, 
                                 c1 - c0, 
                                 n_ch]
                    crop_x = x[:, r0:r1, c0:c1, :]
                    pool_out = tf.math.reduce_max(crop_x, axis = (1, 2))
                    outs[cnt] = pool_out
                    cnt += 1
        
        outs = tf.transpose(np.array(outs), perm = (1, 2, 0))
        outs = tf.reshape(outs, (outs.shape[0], outs.shape[1]*outs.shape[2]))
        
        return outs
                

If the height and width of conv_out are changed and fed into spp(), the shape of spp_out will still be the same. This verifies that spp has the ability to handle various image shape.

In [3]:
#conv_out = [N_batch, Height, Width, N_channel]
conv_out1 = np.random.rand(16,  20,  20, 256)
conv_out2 = np.random.rand(16,   5,   5, 256)
print('conv_out1.shape: ', conv_out1.shape)
print('conv_out2.shape: ', conv_out2.shape)

conv_out1.shape:  (16, 20, 20, 256)
conv_out2.shape:  (16, 5, 5, 256)


In [4]:
spp = Spatial_Pyramid_Pooling()

In [5]:
spp_out1 = spp(conv_out1)
spp_out2 = spp(conv_out2)

Metal device set to: Apple M1


2023-04-20 01:00:28.706616: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-04-20 01:00:28.709369: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
print('spp_out1.shape', spp_out1.shape)
print('spp_out2.shape', spp_out2.shape)
assert spp_out1.shape == spp_out2.shape

spp_out1.shape (16, 5376)
spp_out2.shape (16, 5376)
