Write your post here.

In [3]:
import numpy as np

import tensorflow as tf
import tensorflow.keras as keras


In [4]:
import matplotlib.pyplot as plt

plt.rcParams.update(
    {
        "figure.figsize":(12, 6),
    }
)

In [5]:
from tensorflow.keras.layers import Layer

class FixedSparsityRelu(Layer):

    def __init__(
        self,
        sparsity,
        alpha=0.95,
        **kwargs
    ):
        self.sparsity = sparsity
        self.alpha=alpha
        super(FixedSparsityRelu, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.bias = self.add_weight(
            name="bias",
            shape=(input_shape[-1],),
            initializer="zeros",
            trainable=False,
        )
        super(FixedSparsityRelu, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        biased = tf.nn.bias_add(x, self.bias)
        return tf.nn.relu(biased)
        
    def compute_output_shape(self, input_shape):
        return input_shape


In [None]:
streaming_percentile_estimat

In [6]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

classes = np.unique(y_train)
n_classes = len(classes)

x_train = x_train.astype(np.float32)/255.0
x_test = x_test.astype(np.float32)/255.0

y_train = keras.utils.to_categorical(y_train, n_classes)
y_test = keras.utils.to_categorical(y_test, n_classes)

In [8]:
import tensorflow.keras.layers as L

In [23]:
x_in = keras.layers.Input(x_train.shape[1:])
x = x_in

fsr_inputs = []
fsr_layers = []

x = L.Conv2D(32, 3, activation="relu", padding="same")(x)
cfsr = FixedSparsityRelu(sparsity=0.8)
fsr_layers.append(cfsr)
fsr_inputs.append(x)
x = cfsr(x)

x = L.MaxPooling2D(2)(x)

x = L.Conv2D(64, 3, activation="relu", padding="same")(x)
cfsr = FixedSparsityRelu(sparsity=0.8)
fsr_layers.append(cfsr)
fsr_inputs.append(x)
x = cfsr(x)

x = L.MaxPooling2D(2)(x)

x = L.Conv2D(128, 3, activation="relu", padding="same")(x)
cfsr = FixedSparsityRelu(sparsity=0.8)
fsr_layers.append(cfsr)
fsr_inputs.append(x)
x = cfsr(x)

x = L.GlobalMaxPooling2D()(x)
x = L.Dense(n_classes, activation="softmax")(x)

model = keras.models.Model(x_in, x)
activations_model = keras.models.Model(x_in, fsr_inputs)


In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
fixed_sparsity_relu_6 (Fixed (None, 32, 32, 32)        32        
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
fixed_sparsity_relu_7 (Fixed (None, 16, 16, 64)        64        
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 64)          0         
__________

In [25]:
batch_size =128

In [26]:
train_gen = keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    #rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
)
train_gen.fit(x_train)

train_flow = train_gen.flow(x_train, y_train, batch_size=batch_size)

In [27]:
def melt_dimensions(arr):
    return arr.reshape((np.prod(arr.shape[:-1]), arr.shape[-1]))

In [28]:
model.compile(
    #optimizer=keras.optimizers.Adam(0.001),
    #optimizer=keras.optimizers.SGD(0.01, momentum=0.9, nesterov=True),
    optimizer=keras.optimizers.Adadelta(0.5),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

In [29]:
training_metrics = []
pre_density_history = []
post_density_history = []
bias_history = []

bias_calc_period = 20

for epoch in range(51):
    print("\n")
    print("epoch", epoch+1)
    for batch_idx in range(50000//batch_size):
        cx, cy = next(train_flow)
        
        if batch_idx % bias_calc_period == bias_calc_period-1:
            activations = activations_model.predict(cx)
            #import pdb; pdb.set_trace()
            cur_predense = []
            cur_postdense = []
            cavg_biases = []
            for act, fsr_layer in zip(activations, fsr_layers):
                melted = melt_dimensions(act)
                batch_optimal_biases = -1.0*np.percentile(melted, 100*fsr_layer.sparsity, axis=0)
                current_bias = keras.backend.get_value(fsr_layer.bias)
                new_bias = fsr_layer.alpha*current_bias + (1-fsr_layer.alpha)*batch_optimal_biases
                cpre_density = np.mean(melted > 0)
                cpost_density = np.mean(melted + new_bias > 0)
                cur_predense.append(cpre_density)
                cur_postdense.append(cpost_density)
                #cavg_biases.append(np.mean(new_bias))
                cavg_biases.append(np.mean(current_bias))
                if True:
                    keras.backend.set_value(fsr_layer.bias, new_bias)
            pre_density_history.append(cur_predense)
            post_density_history.append(cur_postdense)
            bias_history.append(cavg_biases)
        
        closs = model.train_on_batch(cx, cy)
        training_metrics.append(closs)
        if batch_idx % 100 == 100-1:
            print("metrics", np.mean(training_metrics[-200:], axis=0))
            print("pre density", np.mean(pre_density_history[-200:], axis=0))
            print("post density", np.mean(post_density_history[-200:], axis=0))
            print("avg bias value", np.mean(bias_history[-200:], axis=0))
    print("hold out evaluation")
    print(model.evaluate(x_test, y_test))



epoch 1
metrics [2.203306  0.1846875]
pre density [0.68804049 0.54264622 0.40980587]
post density [0.64210515 0.50310621 0.38137913]
avg bias value [-0.01716106 -0.01221351 -0.00673218]
metrics [2.1183894  0.22601563]
pre density [0.70214121 0.55237899 0.38942833]
post density [0.62162426 0.48261304 0.34541788]
avg bias value [-0.03923047 -0.03176828 -0.01817382]
metrics [1.9799986  0.28757814]
pre density [0.71690316 0.56005494 0.37476915]
post density [0.60602412 0.46160838 0.31737582]
avg bias value [-0.06308025 -0.05451123 -0.03044858]
hold out evaluation
[1.6755712629318238, 0.4163]


epoch 2
metrics [1.7894044 0.363125 ]
pre density [0.74099907 0.57528152 0.36015089]
post density [0.59086127 0.43625184 0.28836898]
avg bias value [-0.11130512 -0.09986942 -0.05192274]
metrics [1.7204853 0.3859375]
pre density [0.75430503 0.58592813 0.35680544]
post density [0.58706938 0.42645178 0.27860901]
avg bias value [-0.14180703 -0.127518   -0.06431618]
metrics [1.6757598 0.4008594]
pre den

KeyboardInterrupt: 

In [12]:
melted.shape

(65536, 128)

In [13]:
np.mean(melted > 0)

0.02852320671081543

In [14]:
[np.mean(act > 0) for act in activations]

[0.40134739875793457,
 0.02852320671081543,
 0.11204409599304199,
 0.05477714538574219,
 0.022968292236328125,
 0.09661865234375]