In [None]:
import os
import time
import shutil
import time
import pandas as pd
import numpy as np
from functools import partial

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.applications.resnet import preprocess_input, ResNet50
tf.__version__

#### Setup up data loading pipeline

In [None]:
def deserialize_image_record(record):
    feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),
                  'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),
                  'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}
    obj = tf.io.parse_single_example(serialized=record, features=feature_map)
    imgdata = obj['image/encoded']
    label = tf.cast(obj['image/class/label'], tf.int32)   
    label_text = tf.cast(obj['image/class/text'], tf.string)   
    return imgdata, label, label_text

def val_preprocessing(record):
    imgdata, label, label_text = deserialize_image_record(record)
    label -= 1
    image = tf.io.decode_jpeg(imgdata, channels=3, 
                              fancy_upscaling=False, 
                              dct_method='INTEGER_FAST')
    shape = tf.shape(image)
    height = tf.cast(shape[0], tf.float32)
    width = tf.cast(shape[1], tf.float32)
    side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)

    scale = tf.cond(tf.greater(height, width),
                  lambda: side / width,
                  lambda: side / height)
    
    new_height = tf.cast(tf.math.rint(height * scale), tf.int32)
    new_width = tf.cast(tf.math.rint(width * scale), tf.int32)
    
    image = tf.image.resize(image, [new_height, new_width], method='bicubic')
    image = tf.image.resize_with_crop_or_pad(image, 224, 224)
    
    image = preprocess_input(image)
    
    return image, label, label_text

def get_dataset(batch_size, use_cache=False):
    data_dir = '/home/ubuntu/data/users/shashank/imagenet-val-tfrecords/*'
    files = tf.io.gfile.glob(os.path.join(data_dir))
    dataset = tf.data.TFRecordDataset(files)
    
    dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat(count=1)
    
    if use_cache:
        shutil.rmtree('tfdatacache', ignore_errors=True)
        os.mkdir('tfdatacache')
        dataset = dataset.cache(f'./tfdatacache/imagenet_val')
    
    return dataset

#### Load and save ResNet model

In [None]:
def load_save_model(keras_model, saved_model_dir):
   model = keras_model(weights='imagenet')
   shutil.rmtree(saved_model_dir, ignore_errors=True)
   model.save(saved_model_dir, 
               include_optimizer=False, 
               save_format='tf')

saved_model_dir = "resnet50_saved_model"
load_save_model(ResNet50, saved_model_dir)

#### Calculate model accuracy using TensorFlow on ImageNet validation set

In [None]:
model = tf.keras.models.load_model(saved_model_dir)
display_every = 500
display_threshold = display_every

pred_labels = []
actual_labels = []
iter_times = []
batch_size = 8

# Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset
dataset = get_dataset(batch_size)  

walltime_start = time.time()
for i, (validation_ds, batch_labels, _) in enumerate(dataset):
    start_time = time.time()
    pred_prob_keras = model(validation_ds)
    iter_times.append(time.time() - start_time)
    
    actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)
    pred_labels.extend(list(np.argmax(pred_prob_keras, axis=1)))
    
    if i*batch_size >= display_threshold:
        print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:]))}')
        display_threshold+=display_every

iter_times = np.array(iter_times)
acc_keras_cpu = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)

keras_results = pd.DataFrame(columns = [f'keras_cpu_{batch_size}'])
keras_results.loc['user_batch_size']         = [batch_size]
keras_results.loc['accuracy']                = [acc_keras_cpu]
keras_results.loc['prediction_time']         = [np.sum(iter_times)]
keras_results.loc['wall_time']               = [time.time() - walltime_start]
keras_results.loc['images_per_sec_mean']     = [np.mean(batch_size / iter_times)]
keras_results.loc['images_per_sec_std']      = [np.std(batch_size / iter_times, ddof=1)]
keras_results.loc['latency_mean']            = [np.mean(iter_times) * 1000]
keras_results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation="lower") * 1000]
keras_results.loc['latency_median']          = [np.median(iter_times) * 1000]
keras_results.loc['latency_min']             = [np.min(iter_times) * 1000]
display(keras_results)

In [None]:
### MAX Engine Python API ###
from max import engine
sess = engine.InferenceSession()
model = sess.load(saved_model_dir)

display_every = 500
display_threshold = display_every

pred_labels = []
actual_labels = []
iter_times = []
batch_size = 8

# Get the tf.data.TFRecordDataset object for the ImageNet2012 validation dataset
dataset = get_dataset(batch_size)  

walltime_start = time.time()
for i, (validation_ds, batch_labels, _) in enumerate(dataset):
    start_time = time.time()
    pred_prob_max = model.execute(input_1=validation_ds)
    iter_times.append(time.time() - start_time)
    
    actual_labels.extend(label for label_list in batch_labels.numpy() for label in label_list)
    pred_labels.extend(list(np.argmax(pred_prob_max['predictions'], axis=1)))
    
    if i*batch_size >= display_threshold:
        print(f'Images {i*batch_size}/50000. Average i/s {np.mean(batch_size/np.array(iter_times[-display_every:])):.4f}. Cum. acc: {np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels):.4f}')
        display_threshold+=display_every

iter_times = np.array(iter_times)
acc_max = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)

max_results = pd.DataFrame(columns = [f'max_cpu_{batch_size}'])
max_results.loc['user_batch_size']         = [batch_size]
max_results.loc['accuracy']                = [acc_max]
max_results.loc['prediction_time']         = [np.sum(iter_times)]
max_results.loc['wall_time']               = [time.time() - walltime_start]
max_results.loc['images_per_sec_mean']     = [np.mean(batch_size / iter_times)]
max_results.loc['images_per_sec_std']      = [np.std(batch_size / iter_times, ddof=1)]
max_results.loc['latency_mean']            = [np.mean(iter_times) * 1000]
max_results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation="lower") * 1000]
max_results.loc['latency_median']          = [np.median(iter_times) * 1000]
max_results.loc['latency_min']             = [np.min(iter_times) * 1000]
display(max_results)

In [None]:
pd.concat([keras_results, max_results], axis=1)