In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# from clf_funcs import setup, PerfCounterCallback, env_builder

import pandas as pd
import numpy as np

import tensorflow as tf
from tqdm import trange

INDEX = 3

In [3]:
from time import perf_counter_ns


def combine_model(inputs, predef_model, classifier, image_size=32):
	predef_model_materialised = predef_model(
		input_shape=(image_size, image_size, 3),
		include_top=False,
		weights=None
	)

	# resize = tf.keras.layers.Resizing(image_size, image_size)(inputs)

	feature_extractor = predef_model_materialised(inputs)
	classification_output = classifier(feature_extractor)
	combined = tf.keras.Model(inputs=inputs, outputs=classification_output)

	return combined


def classifier_overlay(inputs):
	x = tf.keras.layers.GlobalAveragePooling2D()(inputs)
	x = tf.keras.layers.Flatten()(x)
	x = tf.keras.layers.Dense(10, activation="softmax", name="classification")(x)
	return x


class FullyConnectedNet(tf.keras.Model):

	def __init__(self, hidden_layers=[800], num_classes=10):
		super().__init__()
		self.hidden_layers = tf.keras.Sequential([
			tf.keras.layers.Dense(n, activation=tf.nn.relu) for n in hidden_layers
		])
		self.output_layer = tf.keras.layers.Dense(num_classes, activation=tf.nn.softmax)

	def call(self, x):
		x = self.hidden_layers(x)
		return self.output_layer(x)


def SimpleConvNetBuilder(num_classes=10):
	layers = [
		tf.keras.Input(shape=(28, 28, 1)),
		tf.keras.layers.Conv2D(16, kernel_size=5, padding="same", activation='relu'),
		tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
		tf.keras.layers.Conv2D(32, kernel_size=5, padding="same", activation='relu'),
		tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
		tf.keras.layers.Flatten(),
		tf.keras.layers.Dense(512, activation='relu'),
		tf.keras.layers.Dense(num_classes, activation='softmax'),
	]
	return tf.keras.Sequential(layers)


def get_cifar10_data(preprocess=None):
	(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
	x_train, x_test = x_train.astype('float32'), x_test.astype('float32')

	if preprocess is not None:
		x_train, x_test = preprocess(x_train), preprocess(x_test)

	return (x_train, y_train), (x_test, y_test)


def load_mnist_imgs_and_labels(imgs_path, labels_path) -> tuple[np.ndarray, np.ndarray]:
	i_hand = open(imgs_path, 'rb')
	l_hand = open(labels_path, 'rb')

	i_hand.seek(4, 0) # skipping "magic" numbers
	l_hand.seek(4, 0)

	n_imgs = int.from_bytes(i_hand.read(4), 'big')

	imgs = np.frombuffer(i_hand.read(), np.uint8, offset=8)
	imgs = (255 - imgs) / 255
	imgs = imgs.reshape(n_imgs, 28 * 28)

	labels = np.frombuffer(l_hand.read(), np.uint8, offset=4)

	i_hand.close()
	l_hand.close()

	return imgs, labels


def get_mnist_loaders(batch_size, test_batch_size=None, flatten=True):
	if not test_batch_size: test_batch_size = batch_size * 2

	x_train, y_train = load_mnist_imgs_and_labels(
		'./drive/MyDrive/colab/datasets/mnist-digits/train-images-idx3-ubyte',
		'./drive/MyDrive/colab/datasets/mnist-digits/train-labels-idx1-ubyte'
	)

	x_test, y_test = load_mnist_imgs_and_labels(
		'./drive/MyDrive/colab/datasets/mnist-digits/t10k-images-idx3-ubyte',
		'./drive/MyDrive/colab/datasets/mnist-digits/t10k-labels-idx1-ubyte'
	)

	if not flatten:
		x_train, x_test = map(
			lambda x: x.reshape(-1, 28, 28, 1),
			(x_train, x_test)
		)

	train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
	train_ds = train_ds.shuffle(buffer_size=1024).batch(batch_size)

	test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
	test_ds = test_ds.batch(test_batch_size)

	return train_ds, test_ds


def env_builder(name, config):
	if name == 'FullyConnectedNet':
		model = FullyConnectedNet()
		train_ds, test_ds = get_mnist_loaders(config['batch_size'], config['test_batch_size'])
	elif name == 'SimpleConvNet':
		model = SimpleConvNetBuilder()
		train_ds, test_ds = get_mnist_loaders(config['batch_size'], config['test_batch_size'], flatten=False)
	elif name == 'ResNet-50':
		model = combine_model(config['inputs'], tf.keras.applications.ResNet50, classifier_overlay)
		train_ds, test_ds = get_cifar10_data(tf.keras.applications.resnet50.preprocess_input)
	elif name == 'DenseNet-121':
		model = combine_model(config['inputs'], tf.keras.applications.DenseNet121, classifier_overlay)
		train_ds, test_ds = get_cifar10_data(tf.keras.applications.densenet.preprocess_input)
	elif name == 'MobileNet-v2':
		model = combine_model(config['inputs'], tf.keras.applications.MobileNetV2, classifier_overlay)
		train_ds, test_ds = get_cifar10_data(tf.keras.applications.mobilenet_v2.preprocess_input)
	elif name == 'ConvNeXt-Tiny':
		model = combine_model(config['inputs'], tf.keras.applications.ConvNeXtTiny, classifier_overlay)
		train_ds, test_ds = get_cifar10_data(tf.keras.applications.convnext.preprocess_input)
	else:
		raise ValueError('Invalid model name')

	return model, train_ds, test_ds


class PerfCounterCallback(tf.keras.callbacks.Callback):
	def __init__(self, telemetry_ref: dict, latency_ref: list[int]):
		super().__init__()
		self.telemetry_ref = telemetry_ref
		self.latency_ref = latency_ref
		self.times = []
		self.eps = []
		self.training = False

	# for training
	def on_train_begin(self, logs=None):
		self.training = True

	def on_epoch_begin(self, epoch, logs=None):
		self.ep_start = perf_counter_ns()

	def on_epoch_end(self, epoch, logs=None):
		self.times.append(perf_counter_ns() - self.ep_start)
		self.eps.append(epoch + 1)

	def on_train_end(self, logs=None):
		self.telemetry_ref['epoch'].extend(self.eps)
		self.telemetry_ref['elapsed_time'].extend(self.times)
		self.training = False

	# for evaluation
	def on_test_begin(self, logs=None):
		self.test_start = perf_counter_ns()

	def on_test_end(self, logs=None):
		if self.training: return
		self.telemetry_ref['elapsed_time'].append(perf_counter_ns() - self.test_start)
		self.telemetry_ref['epoch'].append(1)

	def on_predict_begin(self, logs=None):
		self.pred_start = perf_counter_ns()


	def on_predict_end(self, logs=None):
		pred_end = perf_counter_ns()
		self.latency_ref.append(pred := (pred_end - self.pred_start))

In [4]:
telemetry = {
	'framework': [],
	'model_name': [],
	'rep': [],
	'batch_size': [],
	'elapsed_time': []
}

warmup_steps = 100
repetitions = 50
perf_callback = PerfCounterCallback(None, [])

config = {
	'batch_size': 1,
	'test_batch_size': 1,
	'inputs': tf.keras.layers.Input(shape=(32,32,3)),
}

In [5]:
second_run = False

if second_run:
	models = ['MobileNet-v2', 'ConvNeXt-Tiny']
else:
	# models = ['FullyConnectedNet', 'SimpleConvNet', 'ResNet-50', 'DenseNet-121']
	models = ['FullyConnectedNet', 'SimpleConvNet', 'ResNet-50', 'DenseNet-121', 'MobileNet-v2', 'ConvNeXt-Tiny']

for model_name in models:
	for batch_size in [1, 16, 32, 64, 96, 128, 192, 256]:
		config["batch_size"] = batch_size
		model, ds, _ = env_builder(model_name, config)

		if isinstance(ds, tuple):
			sample = ds[0][:batch_size]
			sample = tf.convert_to_tensor(sample, dtype=tf.float32)
		else:
			sample = next(iter(ds))[0]

		# model.predict(batch_size=None) =================================================
		for i in trange(warmup_steps, desc=f"Warmup for {model_name} (batch of {batch_size})"):
			_ = model.predict(sample, verbose=0, batch_size=None)

		perf_callback.latency_ref.clear()
		for i in trange(repetitions, desc=f"Predict without bsize for {model_name} (batch of {batch_size})"):
			_ = model.predict(sample, verbose=0, callbacks=[perf_callback], batch_size=None)

			telemetry['framework'].append("TF (batch_size=None)")
			telemetry['model_name'].append(model_name)
			telemetry['rep'].append(i)
			telemetry['batch_size'].append(batch_size)
		telemetry['elapsed_time'].extend(perf_callback.latency_ref)

		# model.predict(batch_size=batch_size) ===========================================
		for i in trange(warmup_steps, desc=f"Warmup for {model_name} (batch of {batch_size})"):
			_ = model.predict(sample, verbose=0, batch_size=batch_size)

		perf_callback.latency_ref.clear()
		for i in trange(repetitions, desc=f"Predict with bsize for {model_name} (batch of {batch_size})"):
			_ = model.predict(sample, verbose=0, callbacks=[perf_callback], batch_size=batch_size)

			telemetry['framework'].append("TF (batch_size=N)")
			telemetry['model_name'].append(model_name)
			telemetry['rep'].append(i)
			telemetry['batch_size'].append(batch_size)
		telemetry['elapsed_time'].extend(perf_callback.latency_ref)

		# model.predict_on_batch() =======================================================
		for i in trange(warmup_steps, desc=f"Warmup for {model_name} (batch of {batch_size})"):
			_ = model.predict_on_batch(sample)

		perf_callback.latency_ref.clear()
		for i in trange(repetitions, desc=f"Predict on batch for {model_name} (batch of {batch_size})"):
			perf_callback.on_predict_begin()  # tf api is a joke why predict_on_batch has no callbacks i hate it here
			_ = model.predict_on_batch(sample)
			perf_callback.on_predict_end()

			telemetry['framework'].append("TF (predict_on_batch)")
			telemetry['model_name'].append(model_name)
			telemetry['rep'].append(i)
			telemetry['batch_size'].append(batch_size)
		telemetry['elapsed_time'].extend(perf_callback.latency_ref)

		# model(x) =======================================================================
		for i in trange(warmup_steps, desc=f"Warmup for {model_name} (batch of {batch_size})"):
			_ = model(sample, training=False)

		perf_callback.latency_ref.clear()
		for i in trange(repetitions, desc=f"__call__ for {model_name} (batch of {batch_size})"):
			perf_callback.on_predict_begin()
			_ = model(sample, training=False)
			perf_callback.on_predict_end()

			telemetry['framework'].append("TF (__call__)")
			telemetry['model_name'].append(model_name)
			telemetry['rep'].append(i)
			telemetry['batch_size'].append(batch_size)
		telemetry['elapsed_time'].extend(perf_callback.latency_ref)

		del model

Warmup for FullyConnectedNet (batch of 1): 100%|██████████| 100/100 [00:07<00:00, 13.55it/s]
Predict without bsize for FullyConnectedNet (batch of 1): 100%|██████████| 50/50 [00:02<00:00, 18.73it/s]
Warmup for FullyConnectedNet (batch of 1): 100%|██████████| 100/100 [00:04<00:00, 20.28it/s]
Predict with bsize for FullyConnectedNet (batch of 1): 100%|██████████| 50/50 [00:02<00:00, 20.18it/s]
Warmup for FullyConnectedNet (batch of 1): 100%|██████████| 100/100 [00:00<00:00, 366.44it/s]
Predict on batch for FullyConnectedNet (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 1220.92it/s]
Warmup for FullyConnectedNet (batch of 1): 100%|██████████| 100/100 [00:00<00:00, 196.78it/s]
__call__ for FullyConnectedNet (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 365.67it/s]
Warmup for FullyConnectedNet (batch of 16): 100%|██████████| 100/100 [00:06<00:00, 14.86it/s]
Predict without bsize for FullyConnectedNet (batch of 16): 100%|██████████| 50/50 [00:02<00:00, 17.92it/s]
Warmup for FullyCon

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 0us/step


Warmup for ResNet-50 (batch of 1): 100%|██████████| 100/100 [00:09<00:00, 10.59it/s]
Predict without bsize for ResNet-50 (batch of 1): 100%|██████████| 50/50 [00:02<00:00, 18.61it/s]
Warmup for ResNet-50 (batch of 1): 100%|██████████| 100/100 [00:04<00:00, 20.03it/s]
Predict with bsize for ResNet-50 (batch of 1): 100%|██████████| 50/50 [00:02<00:00, 17.04it/s]
Warmup for ResNet-50 (batch of 1): 100%|██████████| 100/100 [00:03<00:00, 29.28it/s]
Predict on batch for ResNet-50 (batch of 1): 100%|██████████| 50/50 [00:00<00:00, 231.42it/s]
Warmup for ResNet-50 (batch of 1): 100%|██████████| 100/100 [00:20<00:00,  4.84it/s]
__call__ for ResNet-50 (batch of 1): 100%|██████████| 50/50 [00:10<00:00,  4.95it/s]
Warmup for ResNet-50 (batch of 16): 100%|██████████| 100/100 [00:09<00:00, 10.01it/s]
Predict without bsize for ResNet-50 (batch of 16): 100%|██████████| 50/50 [00:02<00:00, 18.35it/s]
Warmup for ResNet-50 (batch of 16): 100%|██████████| 100/100 [00:04<00:00, 20.26it/s]
Predict with bsiz

In [6]:
results = pd.DataFrame(telemetry)

if second_run:
	first_part = pd.read_csv(f"./drive/MyDrive/colab/results/tensorflow-batch-size-comp-{INDEX}.csv")
	results = pd.concat([first_part, results])

results.to_csv(f"./drive/MyDrive/colab/results/tensorflow-batch-size-comp-{INDEX}.csv", index=False)

In [7]:
results = results.drop(["rep"], axis=1)
display(results.head())

results = results.groupby(["framework", "model_name", "batch_size"])
results.mean().head(15).reset_index()

Unnamed: 0,framework,model_name,batch_size,elapsed_time
0,TF (batch_size=None),FullyConnectedNet,1,17255356
1,TF (batch_size=None),FullyConnectedNet,1,18254394
2,TF (batch_size=None),FullyConnectedNet,1,18136394
3,TF (batch_size=None),FullyConnectedNet,1,18133692
4,TF (batch_size=None),FullyConnectedNet,1,18454136


Unnamed: 0,framework,model_name,batch_size,elapsed_time
0,TF (__call__),ConvNeXt-Tiny,1,911683300.0
1,TF (__call__),ConvNeXt-Tiny,16,940779100.0
2,TF (__call__),ConvNeXt-Tiny,32,948665200.0
3,TF (__call__),ConvNeXt-Tiny,64,944601000.0
4,TF (__call__),ConvNeXt-Tiny,96,941918500.0
5,TF (__call__),ConvNeXt-Tiny,128,944919800.0
6,TF (__call__),ConvNeXt-Tiny,192,969745800.0
7,TF (__call__),ConvNeXt-Tiny,256,971407900.0
8,TF (__call__),DenseNet-121,1,416787800.0
9,TF (__call__),DenseNet-121,16,417098700.0


In [8]:
model, ds, _ = env_builder(model_name, config)