<a href="https://colab.research.google.com/github/sayakpaul/EvoNorms-in-TensorFlow-2/blob/master/Mini_Inception_EvoNorm_Sweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Which GPU?
!nvidia-smi

Sun Apr 19 11:31:31 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   57C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
# TensorFlow imports
import tensorflow as tf
print(tf.__version__)

2.2.0-rc3


In [0]:
# Other imports
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
import matplotlib.pyplot as plt
import numpy as np

In [0]:
# Set the random seeds
tf.random.set_seed(666)
np.random.seed(666)

In [0]:
# Set up wandb for easy experiment tracking
!pip install wandb -q
import wandb
wandb.login()

In [8]:
# Load and preprocess CIFAR10 dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train = X_train / 255.
X_test = X_test / 255.
print(X_train.shape, X_test.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
(50000, 32, 32, 3) (10000, 32, 32, 3)


## `EvoNorm2dS0`

In [0]:
# Reference
# https://github.com/lonePatient/EvoNorms_PyTorch/blob/master/models/normalization.py

def group_std(x, groups=32, eps=1e-5):
	N, H, W, C = tf.shape(x)
	x = tf.reshape(x, [N, H, W, groups, C // groups])
	_, var = tf.nn.moments(x, [1, 2, 4], keepdims=True)
	std = tf.sqrt(var + eps)
	std = tf.broadcast_to(std, x.shape)
	return tf.reshape(std, (N, H, W, C))

class EvoNorm2dS0(tf.keras.layers.Layer):
	def __init__(self, in_channels, groups=32, nonlinear=True):
		super(EvoNorm2dS0, self).__init__()
		self.nonlinear = nonlinear
		self.groups = groups

		def build(self):
			self.gamma = self.add_variable("gamma",
									shape=(1, 1, 1, self.in_channels),
									initializer=tf.initializers.Ones())
			self.beta = self.add_variable("beta",
									shape=(1, 1, 1, self.in_channels),
									initializer=tf.initializers.Zeros())
			if self.nonlinear:
				self.v = self.add_variable("v",
									shape=(1, 1, 1, self.in_channels),
									initializer=tf.initializers.Ones())

		def call(self, x):
			if self.nonlinear:
				num = x * tf.nn.sigmoid(self.v * x)
				return num / group_std(x) * self.gamma + self.beta
			else:
				return x * self.gamma + self.beta

## Mini Inception

In [0]:
# Implementation comes from http://pyimg.co/mac01
def minigooglenet_functional(width, height, depth, classes, norm=EvoNorm2dS0, groups=32):
	def conv_module(x, K, kX, kY, stride, chanDim, padding="same"):
		# define a CONV => EvoNorm pattern
		x = Conv2D(K, (kX, kY), strides=stride, padding=padding)(x)
		
		if isinstance(norm, EvoNorm2dS0):
			layer = norm(in_channels=K, groups=groups)
		else:
			layer = norm(in_channels=K)
		
		x = layer(x)

		# return the block
		return x

	def inception_module(x, numK1x1, numK3x3, chanDim):
		# define two CONV modules, then concatenate across the
		# channel dimension
		conv_1x1 = conv_module(x, numK1x1, 1, 1, (1, 1), chanDim)
		conv_3x3 = conv_module(x, numK3x3, 3, 3, (1, 1), chanDim)
		x = concatenate([conv_1x1, conv_3x3], axis=chanDim)

		# return the block
		return x

	def downsample_module(x, K, chanDim):
		# define the CONV module and POOL, then concatenate
		# across the channel dimensions
		conv_3x3 = conv_module(x, K, 3, 3, (2, 2), chanDim,
			padding="valid")
		pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
		x = concatenate([conv_3x3, pool], axis=chanDim)

		# return the block
		return x

	# initialize the input shape to be "channels last" and the
	# channels dimension itself
	inputShape = (height, width, depth)
	chanDim = -1

	# define the model input and first CONV module
	inputs = Input(shape=inputShape)
	x = conv_module(inputs, 96, 3, 3, (1, 1), chanDim)

	# two Inception modules followed by a downsample module
	x = inception_module(x, 32, 32, chanDim)
	x = inception_module(x, 32, 48, chanDim)
	x = downsample_module(x, 80, chanDim)

	# four Inception modules followed by a downsample module
	x = inception_module(x, 112, 48, chanDim)
	x = inception_module(x, 96, 64, chanDim)
	x = inception_module(x, 80, 80, chanDim)
	x = inception_module(x, 48, 96, chanDim)
	x = downsample_module(x, 96, chanDim)

	# two Inception modules followed by global POOL and dropout
	x = inception_module(x, 176, 160, chanDim)
	x = inception_module(x, 176, 160, chanDim)
	x = AveragePooling2D((7, 7))(x)
	x = Dropout(0.5)(x)

	# softmax classifier
	x = Flatten()(x)
	x = Dense(classes)(x)
	x = Activation("softmax")(x)

	# create the model
	model = Model(inputs, x, name="minigooglenet")

	# return the constructed network architecture
	return model

In [0]:
# One-hot encoding of the labels
y_train_ohe = tf.keras.utils.to_categorical(y_train)
y_test_ohe = tf.keras.utils.to_categorical(y_test)

In [0]:
# Hyperparameters
BATCH_SIZE=128
EPOCHS=60

In [0]:
# Import wandb's Keras callback
from wandb.keras import WandbCallback

In [0]:
# Configure the sweep – specify the parameters to search through, the search strategy, the optimization metric et all.
sweep_config = {
    "method": "random", #grid, random
    "metric": {
      "name": "accuracy",
      "goal": "maximize"   
    },
    "parameters": {
        "groups": {
            "values": [4, 8, 12, 16, 32]
        },
        "epochs": {
            "values": [10, 20, 30, 40, 50, 60]
        },
        "learning_rate": {
            "values": [1e-2, 1e-3, 1e-4, 3e-4, 3e-5, 1e-5]
        },
        "optimizer": {
            'values': ["adam", "sgd"]
        }
    }
}

In [15]:
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, entity="sayakpaul", project="EvoNorm-TensorFlow2")

Create sweep with ID: dwn5aefr
Sweep URL: https://app.wandb.ai/sayakpaul/EvoNorm-TensorFlow2/sweeps/dwn5aefr


In [0]:
# Define the training loop
def train():
    # Default values for hyper-parameters we're going to sweep over
    config_defaults = {
        "batch_size": 128,
    }
    
    # Initialize a new wandb run
    wandb.init(config=config_defaults, project="EvoNorm-TensorFlow2")
    
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config
    
    # Call model with the current parameters
    model = minigooglenet_functional(32, 32, 3, 10, norm=EvoNorm2dS0, groups=config.groups)
    
    # Set up optimizer
    if config.optimizer=="sgd":
        optimizer = tf.keras.optimizers.SGD(lr=config.learning_rate, momentum=0.9, decay=1e-2 / config.epochs)
    elif config.optimizer=="adam":
        optimizer = tf.keras.optimizers.Adam(lr=config.learning_rate, beta_1=0.9, beta_2=0.999, clipnorm=1.0)
    
    # Compile model
    model.compile(loss="categorical_crossentropy", optimizer=optimizer,
	    metrics=["accuracy"])
    
    # Train model    
    model.fit(X_train, y_train_ohe,
                    validation_data=(X_test, y_test_ohe),
                    batch_size=config_defaults["batch_size"],
                    epochs=config.epochs,
                    callbacks=[WandbCallback()])

In [0]:
# Initialize sweep
wandb.agent(sweep_id, train)