In [None]:
# !aws s3 sync s3://imagenet-dataset-us-west-2/imagenet-data/tfrecords/validation/ /home/ubuntu/datasets/
# !pip install matplotlib pandas

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.contrib.ei.python.keras.ei_keras import EIKerasModel
import numpy as np
import pandas as pd
import shutil
import time
import os
!/opt/amazon/ei/ei_tools/bin/ei describe-accelerators --json

{
  "ei_client_version": "1.7.0",
  "time": "Sat Sep 19 07:40:14 2020",
  "attached_accelerators": 2,
  "devices": [
    {
      "ordinal": 0,
      "type": "eia2.large",
      "id": "eia-63a6cf28f02841469c58055bff078a95",
      "status": "healthy"
    },
    {
      "ordinal": 1,
      "type": "eia2.xlarge",
      "id": "eia-ef9561df7dd74b308ecefbd8b362ca69",
      "status": "healthy"
    }
  ]
}


Using TensorFlow backend.


In [3]:
# # Export SavedModel
# saved_model_dir = 'resnet50_saved_model_fp32'
# shutil.rmtree(saved_model_dir, ignore_errors=True)

# keras.backend.set_learning_phase(0)
# model = ResNet50(weights='imagenet')
# tf.saved_model.simple_save(session = keras.backend.get_session(),
#                            export_dir = saved_model_dir,
#                            inputs = {'input': model.inputs[0]},
#                            outputs = {'output': model.outputs[0]})

In [4]:
def deserialize_image_record(record):
    feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),
                  'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),
                  'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}
    obj = tf.io.parse_single_example(serialized=record, features=feature_map)
    imgdata = obj['image/encoded']
    label = tf.cast(obj['image/class/label'], tf.int32)   
    label_text = tf.cast(obj['image/class/text'], tf.string)   
    return imgdata, label, label_text

def val_preprocessing(record):
    imgdata, label, label_text = deserialize_image_record(record)
    label -= 1
    image = tf.io.decode_jpeg(imgdata, channels=3, 
                              fancy_upscaling=False, 
                              dct_method='INTEGER_FAST')

    shape = tf.shape(image)
    height = tf.cast(shape[0], tf.float32)
    width = tf.cast(shape[1], tf.float32)
    side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)

    scale = tf.cond(tf.greater(height, width),
                  lambda: side / width,
                  lambda: side / height)
    
    new_height = tf.cast(tf.math.rint(height * scale), tf.int32)
    new_width = tf.cast(tf.math.rint(width * scale), tf.int32)
    
    image = tf.image.resize(image, [new_height, new_width], method='bicubic')
    image = tf.image.resize_with_crop_or_pad(image, 224, 224)
    
    [image,] = tf.py_function(preprocess_input, [image], [tf.float32])
    
    return image, label, label_text

def get_dataset(batch_size, use_cache=False):
    data_dir = '/home/ubuntu/datasets/*'
    files = tf.io.gfile.glob(os.path.join(data_dir))
    dataset = tf.data.TFRecordDataset(files)
    
    dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=8)
    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat(count=1)
    
    if use_cache:
        shutil.rmtree('tfdatacache', ignore_errors=True)
        os.mkdir('tfdatacache')
        dataset = dataset.cache(f'./tfdatacache/imagenet_val')
    
    return dataset

In [5]:
def ei_predict_benchmark(model_type='ei', user_batch_size=1, use_cache=True, warm_up=10):
    iter_times = []
    pred_labels = []
    actual_labels = []
    display_threshold = 0
    warm_up = 10

    ds = get_dataset(user_batch_size, use_cache)

    ds_iter = ds.make_initializable_iterator()
    ds_next = ds_iter.get_next()
    ds_init_op = ds_iter.initializer

    with tf.Session() as sess:
        if model_type == 'ei':
            mdl = EIKerasModel(ResNet50(weights='imagenet'), accelerator_id=1)
        else:
            mdl = ResNet50(weights='imagenet')
            
        if use_cache:
            sess.run(ds_init_op)
            print('\nCaching dataset ...')
            start_time = time.time()
            try:
                while True:
                    (validation_ds,label,_) = sess.run(ds_next)
            except tf.errors.OutOfRangeError:
                pass
            print(f'Caching finished: {time.time()-start_time} sec')  

        try:
            sess.run(ds_init_op)
            counter = 0

            while True:
                (validation_ds,label,_) = sess.run(ds_next)
                if counter == 0:
                    for i in range(warm_up):
                        _ = mdl.predict(validation_ds);                    

                start_time = time.time()
                ei_results = mdl.predict(validation_ds);
                iter_times.append(time.time() - start_time)

                actual_labels.extend(l for k in label for l in k)
                pred_labels.extend(list(np.argmax(ei_results, axis=1)))

                if (counter)*user_batch_size >= display_threshold:
                    print(f'Images {(counter)*user_batch_size}/50000. Average i/s {np.mean(user_batch_size/np.array(iter_times))}')
                    display_threshold+=500

                counter+=1

        except tf.errors.OutOfRangeError:
            pass
        
    acc_inf1 = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)
    iter_times = np.array(iter_times)
    
    results = pd.DataFrame()
    results['model']                   = [model_type]
    results['accuracy']                = [acc_inf1]
    results['prediction_time']         = [np.sum(iter_times)]
    results['images_per_sec_mean']     = [np.mean(user_batch_size/np.array(iter_times))]
    results['latency_per_thread_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation="lower") * 1000]
    results['latency_per_thread_mean']            = [np.mean(iter_times) * 1000]
    results['latency_per_thread_median']          = [np.median(iter_times) * 1000]
    results['latency_per_thread_min']             = [np.min(iter_times) * 1000]
    
    display(results)
    return results, iter_times

In [6]:
batch_size = 128

results = ei_predict_benchmark(model_type='ei', user_batch_size = batch_size, 
                                                 use_cache=False,
                                                 warm_up=10)

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.






INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to write.


INFO:tensorflow:No assets to write.


INFO:tensorflow:SavedModel written to: /tmp/tmpd6xam1sm/1/saved_model.pb


INFO:tensorflow:SavedModel written to: /tmp/tmpd6xam1sm/1/saved_model.pb


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


INFO:tensorflow:Restoring parameters from /tmp/tmpd6xam1sm/1/variables/variables


INFO:tensorflow:Restoring parameters from /tmp/tmpd6xam1sm/1/variables/variables


Images 0/50000. Average i/s 170.343332913454
Images 512/50000. Average i/s 169.37134483619406
Images 1024/50000. Average i/s 169.48190616058457
Images 1536/50000. Average i/s 169.45825076668805
Images 2048/50000. Average i/s 169.41535395632232
Images 2560/50000. Average i/s 170.1367773102819
Images 3072/50000. Average i/s 170.33963741460698
Images 3584/50000. Average i/s 170.48250189775416
Images 4096/50000. Average i/s 170.55620783505245
Images 4608/50000. Average i/s 170.48099711555795
Images 5120/50000. Average i/s 170.46558918894445
Images 5504/50000. Average i/s 170.40946254330353
Images 6016/50000. Average i/s 170.33419953740122
Images 6528/50000. Average i/s 170.39716505386428
Images 7040/50000. Average i/s 170.57583140066703
Images 7552/50000. Average i/s 170.96684764845335
Images 8064/50000. Average i/s 171.22198539201256
Images 8576/50000. Average i/s 171.42777154384743
Images 9088/50000. Average i/s 171.56753051207332
Images 9600/50000. Average i/s 171.7707588823742
Images 1

Unnamed: 0,model,accuracy,prediction_time,images_per_sec_mean,latency_per_thread_99th_percentile,latency_per_thread_mean,latency_per_thread_median,latency_per_thread_min
0,ei,0.74844,291.608116,171.668858,764.994621,745.800808,745.255709,722.103834


In [7]:
from tensorflow.contrib.ei.python.predictor.ei_predictor import EIPredictor
def ei_predict_benchmark_saved_model(model_dir='resnet50_saved_model_fp16',user_batch_size=1, use_cache=True, warm_up=10):
    
    eia_predictor = EIPredictor(model_dir,accelerator_id=1)
            
    iter_times = []
    pred_labels = []
    actual_labels = []
    display_threshold = 0
    warm_up = 10

    ds = get_dataset(user_batch_size, use_cache)

    ds_iter = ds.make_initializable_iterator()
    ds_next = ds_iter.get_next()
    ds_init_op = ds_iter.initializer

    with tf.Session() as sess:
        if use_cache:
            sess.run(ds_init_op)
            print('\nCaching dataset ...')
            start_time = time.time()
            try:
                while True:
                    (validation_ds,label,_) = sess.run(ds_next)
            except tf.errors.OutOfRangeError:
                pass
            print(f'Caching finished: {time.time()-start_time} sec')  

        try:
            sess.run(ds_init_op)
            counter = 0
            ipname = list(eia_predictor.feed_tensors.keys())[0]
            resname = list(eia_predictor.fetch_tensors.keys())[0]
            
            while True:
                (validation_ds,label,_) = sess.run(ds_next)
                model_feed_dict={ipname: validation_ds}
                
                if counter == 0:
                    for i in range(warm_up):
                        _ = eia_predictor(model_feed_dict);                    

                start_time = time.time()
                ei_results = eia_predictor(model_feed_dict);
                iter_times.append(time.time() - start_time)

                actual_labels.extend(l for k in label for l in k)
                pred_labels.extend(list(np.argmax(ei_results[resname], axis=1)))

                if (counter)*user_batch_size >= display_threshold:
                    print(f'Images {(counter)*user_batch_size}/50000. Average i/s {np.mean(user_batch_size/np.array(iter_times))}')
                    display_threshold+=500

                counter+=1

        except tf.errors.OutOfRangeError:
            pass
        
    acc_inf1 = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)
    iter_times = np.array(iter_times)
    
    results = pd.DataFrame()
    results['model']                   = [model_dir]
    results['accuracy']                = [acc_inf1]
    results['prediction_time']         = [np.sum(iter_times)]
    results['images_per_sec_mean']     = [np.mean(user_batch_size/np.array(iter_times))]
    results['latency_per_thread_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation="lower") * 1000]
    results['latency_per_thread_mean']            = [np.mean(iter_times) * 1000]
    results['latency_per_thread_median']          = [np.median(iter_times) * 1000]
    results['latency_per_thread_min']             = [np.min(iter_times) * 1000]
    
    display(results)
    return results, iter_times

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [8]:
batch_size = 128

results = ei_predict_benchmark_saved_model(model_dir='resnet50_saved_model_fp16',
                                           user_batch_size = batch_size, 
                                           use_cache=False,
                                           warm_up=10)

Using DEFAULT_SERVING_SIGNATURE_DEF_KEY .....
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.


INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.


Images 0/50000. Average i/s 347.7458526032153
Images 512/50000. Average i/s 303.43561842418654
Images 1024/50000. Average i/s 298.616514579938
Images 1536/50000. Average i/s 297.6800295711833
Images 2048/50000. Average i/s 297.51473495602903
Images 2560/50000. Average i/s 296.94056642074884
Images 3072/50000. Average i/s 295.9987898650538
Images 3584/50000. Average i/s 295.5611074867805
Images 4096/50000. Average i/s 295.3565486885888
Images 4608/50000. Average i/s 294.86760385902284
Images 5120/50000. Average i/s 294.40674928995975
Images 5504/50000. Average i/s 293.98356892121313
Images 6016/50000. Average i/s 292.8397557759024
Images 6528/50000. Average i/s 292.2506209877635
Images 7040/50000. Average i/s 291.5824902531458
Images 7552/50000. Average i/s 291.33338211728636
Images 8064/50000. Average i/s 291.4911030120701
Images 8576/50000. Average i/s 291.66555781835723
Images 9088/50000. Average i/s 291.34363561631045
Images 9600/50000. Average i/s 290.9210596504794
Images 10112/500

Unnamed: 0,model,accuracy,prediction_time,images_per_sec_mean,latency_per_thread_99th_percentile,latency_per_thread_mean,latency_per_thread_median,latency_per_thread_min
0,resnet50_saved_model_fp16,0.74862,173.697676,288.780592,475.766659,444.23958,442.209959,228.409052
