# AWS Inferentia inference on Amazon EC2 Inf1 instance
This example demonstrates AWS Inferentia inference with TensorFlow and AWS Neuron SDK compiler and runtime

This example was tested on Amazon EC2 `inf1.xlarge` the following AWS Deep Learning AMI: 
`Deep Learning AMI (Ubuntu 18.04) Version 35.0`

Run this notebook using the following conda environment:
`aws_neuron_tensorflow_p36`

Prepare your imagenet validation TFRecord files using the following helper script:
https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh

Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function

In [1]:
# !pip install matplotlib pandas

In [2]:
!/opt/aws/neuron/bin/neuron-cli reset
import os
import time
import shutil
import json
import requests
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.neuron as tfn
import tensorflow.compat.v1.keras as keras
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from concurrent import futures
from itertools import compress

In [3]:
# https://github.com/tensorflow/tensorflow/issues/29931
temp = tf.zeros([8, 224, 224, 3])
_ = tf.keras.applications.resnet50.preprocess_input(temp)

### Resnet50 FP32 saved model

In [4]:
# Export SavedModel
saved_model_dir = 'resnet50_saved_model'
shutil.rmtree(saved_model_dir, ignore_errors=True)

keras.backend.set_learning_phase(0)
model = ResNet50(weights='imagenet')
tf.saved_model.simple_save(session = keras.backend.get_session(),
                           export_dir = saved_model_dir,
                           inputs = {'input_1:0': model.inputs[0]},
                           outputs = {'probs/Softmax:0': model.outputs[0]})

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: resnet50_saved_model/saved_model.pb


### Compile models with different batch sizes and cores

In [5]:
def compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1, use_static_weights=False):
    print(f'-----------batch size: {batch_size}, num cores: {num_cores}----------')
    print('Compiling...')
    
    compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'
    inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)
    shutil.rmtree(inf1_compiled_model_dir, ignore_errors=True)

    example_input = np.zeros([batch_size,224,224,3], dtype='float32')

    compiler_args = ['--verbose','1', '--num-neuroncores', str(num_cores)]
    if use_static_weights:
        compiler_args.append('--static-weights')
    
    start_time = time.time()
    compiled_res = tfn.saved_model.compile(model_dir = saved_model_dir,
                            model_feed_dict={'input_1:0': example_input},
                            new_model_dir = inf1_compiled_model_dir,
                            dynamic_batch_size=True,
                            compiler_workdir=f'./compiler-workdir/{inf1_compiled_model_dir}',
                            compiler_args = compiler_args)
    print(f'Compile time: {time.time() - start_time}')
    
    compile_success = False
    perc_on_inf = compiled_res['OnNeuronRatio'] * 100
    if perc_on_inf > 50:
        compile_success = True
            
    print(inf1_compiled_model_dir)
    print(compiled_res)
    print('----------- Done! ----------- \n')
    
    return compile_success

### Use `tf.data` to read ImageNet validation dataset

In [6]:
def deserialize_image_record(record):
    feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),
                  'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),
                  'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}
    obj = tf.io.parse_single_example(serialized=record, features=feature_map)
    imgdata = obj['image/encoded']
    label = tf.cast(obj['image/class/label'], tf.int32)   
    label_text = tf.cast(obj['image/class/text'], tf.string)   
    return imgdata, label, label_text

def val_preprocessing(record):
    imgdata, label, label_text = deserialize_image_record(record)
    label -= 1
    image = tf.io.decode_jpeg(imgdata, channels=3, 
                              fancy_upscaling=False, 
                              dct_method='INTEGER_FAST')

    shape = tf.shape(image)
    height = tf.cast(shape[0], tf.float32)
    width = tf.cast(shape[1], tf.float32)
    side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)

    scale = tf.cond(tf.greater(height, width),
                  lambda: side / width,
                  lambda: side / height)
    
    new_height = tf.cast(tf.math.rint(height * scale), tf.int32)
    new_width = tf.cast(tf.math.rint(width * scale), tf.int32)
    
    image = tf.image.resize(image, [new_height, new_width], method='bicubic')
    image = tf.image.resize_with_crop_or_pad(image, 224, 224)
    
    image = tf.keras.applications.resnet50.preprocess_input(image)
    
    return image, label, label_text

def get_dataset(batch_size, use_cache=False):
    data_dir = '/home/ubuntu/datasets/*'
    files = tf.io.gfile.glob(os.path.join(data_dir))
    dataset = tf.data.TFRecordDataset(files)
    
    dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat(count=1)
    
    if use_cache:
        shutil.rmtree('tfdatacache', ignore_errors=True)
        os.mkdir('tfdatacache')
        dataset = dataset.cache(f'./tfdatacache/imagenet_val')
    
    return dataset

## Single AWS Inferentia chip execution
* Single core compiled models with automatic data parallel model upto 4 cores
* Multi-core compiled models for pipeline execution

In [13]:
def inf1_predict_benchmark_single_threaded(neuron_saved_model_name, batch_size, user_batch_size, num_cores, use_cache=False, warm_up=10):
    print(f'Running model {neuron_saved_model_name}, user_batch_size: {user_batch_size}\n')

    model_inf1 = tf.contrib.predictor.from_saved_model(neuron_saved_model_name)

    iter_times = []
    pred_labels = []
    actual_labels = []
    display_threshold = 0
    warm_up = 10

    ds = get_dataset(user_batch_size, use_cache)

    ds_iter = ds.make_initializable_iterator()
    ds_next = ds_iter.get_next()
    ds_init_op = ds_iter.initializer

    with tf.Session() as sess:
        if use_cache:
            sess.run(ds_init_op)
            print('\nCaching dataset ...')
            start_time = time.time()
            try:
                while True:
                    (validation_ds,label,_) = sess.run(ds_next)
            except tf.errors.OutOfRangeError:
                pass
            print(f'Caching finished: {time.time()-start_time} sec')  

        try:
            sess.run(ds_init_op)
            counter = 0
            
            total_datas = 1000
            display_every = 100
            display_threshold = display_every
            
            ipname = list(model_inf1.feed_tensors.keys())[0]
            resname = list(model_inf1.fetch_tensors.keys())[0]
            
            walltime_start = time.time()

            while True:
                (validation_ds,batch_labels,_) = sess.run(ds_next)

                model_feed_dict={ipname: validation_ds}

                if counter == 0:
                    for i in range(warm_up):
                        _ = model_inf1(model_feed_dict);                    

                start_time = time.time()
                inf1_results = model_inf1(model_feed_dict);
                iter_times.append(time.time() - start_time)
                
                actual_labels.extend(label for label_list in batch_labels for label in label_list)
                pred_labels.extend(list(np.argmax(inf1_results[resname], axis=1)))

                if counter*user_batch_size >= display_threshold:
                    print(f'Images {counter*user_batch_size}/{total_datas}. Average i/s {np.mean(user_batch_size/np.array(iter_times[-display_every:]))}')
                    display_threshold+=display_every

                counter+=1

        except tf.errors.OutOfRangeError:
            pass
        
    acc_inf1 = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)
    iter_times = np.array(iter_times)
    
    instacne_ip = '35.82.33.15'
    
    results = pd.DataFrame(columns = [f'inf1_compiled_batch_size_{batch_size}_compiled_cores_{num_cores}'])
    results.loc['instance_type']           = [requests.get(f'http://{instance_ip}/latest/meta-data/instance-type').text]
    results.loc['compiled_batch_size']     = [batch_size]
    results.loc['user_batch_size']         = [user_batch_size]
    results.loc['accuracy']                = [acc_inf1]
    results.loc['prediction_time']         = [np.sum(iter_times)]
    results.loc['wall_time']               = [time.time() - walltime_start]
    results.loc['images_per_sec_mean']     = [np.mean(user_batch_size / iter_times)]
    results.loc['images_per_sec_std']      = [np.std(user_batch_size / iter_times, ddof=1)]
    results.loc['latency_mean']            = [np.mean(iter_times) * 1000]
    results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation="lower") * 1000]
    results.loc['latency_median']          = [np.median(iter_times) * 1000]
    results.loc['latency_min']             = [np.min(iter_times) * 1000]
    display(results.T)

    return results, iter_times

In [8]:
inf1_model_dir = 'resnet50_inf1_saved_models'
saved_model_dir = 'resnet50_saved_model'


# testing batch size
batch_list = [1,2,4,8,16,32,64]
for batch in batch_list:
    print('batch size:', batch, 'compile start')
    compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=batch, num_cores=1)


batch size:  1 compile start
-----------batch size: 1, num cores: 1----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:fusing subgraph {subgraph neuron_op_d6f098c01c780733 with input tensors ["<tf.Tensor 'input_10/_0:0' shape=(1, 224, 224, 3) dtype=float32>"], output tensors ["<tf.Tensor 'probs/Softmax:0' shape=(1, 1000) dtype=float32>"]} with neuron-cc; you may check progress by inspecting file /home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron runtime: 874
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
I

INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:fusing subgraph {subgraph neuron_op_d6f098c01c780733 with input tensors ["<tf.Tensor 'input_10/_0:0' shape=(64, 224, 224, 3) dtype=float32>"], output tensors ["<tf.Tensor 'probs/Softmax:0' shape=(64, 1000) dtype=float32>"]} with neuron-cc; you may check progress by inspecting file /home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron runtime: 874
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_64_inf1_

In [14]:
inf1_model_dir = 'resnet50_inf1_saved_models'

compile_options = [{'batch_size': 1, 'num_cores': 1},
                  {'batch_size': 2, 'num_cores': 1},
                  {'batch_size': 4, 'num_cores': 1},
                  {'batch_size': 8, 'num_cores': 1},
                  {'batch_size': 16, 'num_cores': 1},
                  {'batch_size': 32, 'num_cores': 1},
                  {'batch_size': 64, 'num_cores': 1}]

iter_ds = pd.DataFrame()
results = pd.DataFrame()

for opt in compile_options:
    batch_size = opt["batch_size"]
    num_cores = opt["num_cores"]
    compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'
    inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)
   
    print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')
    col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'
    
    res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,
                                                                     batch_size = batch_size,
                                                                     user_batch_size = batch_size*10,
                                                                     num_cores = num_cores,
                                                                     use_cache=False, 
                                                                     warm_up=10)
    
    iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)
    results = pd.concat([results, res], axis=1)
    
display(results)

inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 491.41810926716494
Images 200/1000. Average i/s 460.18351716457835
Images 300/1000. Average i/s 460.72387736524547
Images 400/1000. Average i/s 461.2004287554968
Images 500/1000. Average i/s 464.32750852578755
Images 600/1000. Average i/s 460.61919349607024
Images 700/1000. Average i/s 460.1897324345884
Images 800/1000. Average i/s 465.763023338468
Images 900/1000. Average i/s 465.28044390084625


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_1,inf1.xlarge,1,10,0.919,2.20407,5.14522,464.954,72.2145,22.0407,30.8819,21.4716,15.3298


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_1, user_batch_size: 20

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 666.7027829732694
Images 200/1000. Average i/s 658.0821125166833
Images 300/1000. Average i/s 675.1690961212332
Images 400/1000. Average i/s 690.5629491987505
Images 500/1000. Average i/s 698.9765127805282
Images 600/1000. Average i/s 698.7299597525881
Images 700/1000. Average i/s 698.7823925386114
Images 800/1000. Average i/s 696.8478468738555
Images 900/1000. Average i/s 703.8326940938057


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_2_compiled_cores_1,inf1.xlarge,2,20,0.919,1.43565,5.16259,706.347,83.3811,28.713,37.9462,28.4489,22.1183


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_1, user_batch_size: 40

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 120/1000. Average i/s 883.7303822460225
Images 200/1000. Average i/s 910.3532189125489
Images 320/1000. Average i/s 933.2543736882609
Images 400/1000. Average i/s 928.786405079883
Images 520/1000. Average i/s 925.2014730162482
Images 600/1000. Average i/s 920.1611134377117
Images 720/1000. Average i/s 914.1053154598816
Images 800/1000. Average i/s 920.8408044158014
Images 920/1000. Average i/s 924.3409428725789


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_4_compiled_cores_1,inf1.xlarge,4,40,0.918,1.08515,5.32341,928.432,77.482,43.4062,53.0889,43.2355,37.8447


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_1, user_batch_size: 80

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 937.7894292926667
Images 240/1000. Average i/s 923.1739342726929
Images 320/1000. Average i/s 944.8058263792731
Images 400/1000. Average i/s 952.043050016161
Images 560/1000. Average i/s 952.4978564888605
Images 640/1000. Average i/s 963.4370381307876
Images 720/1000. Average i/s 972.7529568902988
Images 800/1000. Average i/s 977.567721602059
Images 960/1000. Average i/s 1026.6204212784692


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_8_compiled_cores_1,inf1.xlarge,8,80,0.919,1.03814,6.26644,1026.62,197.527,79.8568,90.9786,80.9529,48.6259


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_1, user_batch_size: 160

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 943.6387946463141
Images 320/1000. Average i/s 972.5409722188607
Images 480/1000. Average i/s 976.545997911181
Images 640/1000. Average i/s 992.420051915203
Images 800/1000. Average i/s 970.247370474821
Images 960/1000. Average i/s 1183.9930141441357


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_16_compiled_cores_1,inf1.xlarge,16,160,0.919,1.0598,8.34057,1183.99,569.93,151.4,179.112,160.969,64.8701


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_1, user_batch_size: 320

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 320/1000. Average i/s 888.852374914444
Images 640/1000. Average i/s 899.141056506683
Images 960/1000. Average i/s 1347.1614323732751


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_32_compiled_cores_1,inf1.xlarge,32,320,0.918,1.18688,11.2203,1347.16,896.165,296.721,358.339,353.136,118.905


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1, user_batch_size: 640

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 640/1000. Average i/s 595.7175019689877


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_64_compiled_cores_1,inf1.xlarge,64,640,0.92,2.27574,21.2163,595.718,199.072,1137.87,868.995,1137.87,868.995


Unnamed: 0,inf1_compiled_batch_size_1_compiled_cores_1,inf1_compiled_batch_size_2_compiled_cores_1,inf1_compiled_batch_size_4_compiled_cores_1,inf1_compiled_batch_size_8_compiled_cores_1,inf1_compiled_batch_size_16_compiled_cores_1,inf1_compiled_batch_size_32_compiled_cores_1,inf1_compiled_batch_size_64_compiled_cores_1
instance_type,inf1.xlarge,inf1.xlarge,inf1.xlarge,inf1.xlarge,inf1.xlarge,inf1.xlarge,inf1.xlarge
compiled_batch_size,1,2,4,8,16,32,64
user_batch_size,10,20,40,80,160,320,640
accuracy,0.919,0.919,0.918,0.919,0.919,0.918,0.92
prediction_time,2.20407,1.43565,1.08515,1.03814,1.0598,1.18688,2.27574
wall_time,5.14522,5.16259,5.32341,6.26644,8.34057,11.2203,21.2163
images_per_sec_mean,464.954,706.347,928.432,1026.62,1183.99,1347.16,595.718
images_per_sec_std,72.2145,83.3811,77.482,197.527,569.93,896.165,199.072
latency_mean,22.0407,28.713,43.4062,79.8568,151.4,296.721,1137.87
latency_99th_percentile,30.8819,37.9462,53.0889,90.9786,179.112,358.339,868.995


In [15]:
inf1_model_dir = 'resnet50_inf1_saved_models'
saved_model_dir = 'resnet50_saved_model'


# testing batch size
core_list = [1,2,3,4,5,6]
for core in core_list:
    print('core size:', core, 'compile start')
    compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=core)


core size: 1 compile start
-----------batch size: 1, num cores: 1----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:fusing subgraph {subgraph neuron_op_d6f098c01c780733 with input tensors ["<tf.Tensor 'input_10/_0:0' shape=(1, 224, 224, 3) dtype=float32>"], output tensors ["<tf.Tensor 'probs/Softmax:0' shape=(1, 1000) dtype=float32>"]} with neuron-cc; you may check progress by inspecting file /home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron runtime: 874
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INF

In [17]:
inf1_model_dir = 'resnet50_inf1_saved_models'

compile_options = [{'batch_size': 1, 'num_cores': 1},
                  {'batch_size': 1, 'num_cores': 2},
                  {'batch_size': 1, 'num_cores': 3},
                  {'batch_size': 1, 'num_cores': 4},
                  {'batch_size': 1, 'num_cores': 5},
                  {'batch_size': 1, 'num_cores': 6}]

iter_ds = pd.DataFrame()
results = pd.DataFrame()

for opt in compile_options:
    batch_size = opt["batch_size"]
    num_cores = opt["num_cores"]
    compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'
    inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)
   
    print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')
    col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'
    
    res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,
                                                                     batch_size = batch_size,
                                                                     user_batch_size = batch_size*10,
                                                                     num_cores = num_cores,
                                                                     use_cache=False, 
                                                                     warm_up=10)
    
    iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)
    results = pd.concat([results, res], axis=1)
    
display(results)

inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 492.7609983197934
Images 200/1000. Average i/s 498.76356187020656
Images 300/1000. Average i/s 487.3440884835897
Images 400/1000. Average i/s 496.515840915395
Images 500/1000. Average i/s 494.38088381082173
Images 600/1000. Average i/s 492.91520717919605
Images 700/1000. Average i/s 492.3006276132294
Images 800/1000. Average i/s 489.16447506571853
Images 900/1000. Average i/s 486.91232490944327


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_1,inf1.xlarge,1,10,0.919,2.1282,5.25748,483.723,80.6067,21.282,31.4922,20.7072,14.9789


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_2, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 358.794640450612
Images 200/1000. Average i/s 356.2934837527057
Images 300/1000. Average i/s 360.7169010822598
Images 400/1000. Average i/s 361.79747439280237
Images 500/1000. Average i/s 360.86751193207414
Images 600/1000. Average i/s 360.79159999749197
Images 700/1000. Average i/s 360.03486045583173
Images 800/1000. Average i/s 360.59974105959327
Images 900/1000. Average i/s 361.2857916367616


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_2,inf1.xlarge,1,10,0.917,2.78887,4.35097,362.019,34.8972,27.8887,35.2595,27.1313,23.3169


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_3, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 355.1456372772402
Images 200/1000. Average i/s 354.43537802814114
Images 300/1000. Average i/s 349.504273352511
Images 400/1000. Average i/s 352.42197405317285
Images 500/1000. Average i/s 356.0861528630035
Images 600/1000. Average i/s 357.661102360892
Images 700/1000. Average i/s 358.6916404156829
Images 800/1000. Average i/s 359.2757587603078
Images 900/1000. Average i/s 359.2126386948329


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_3,inf1.xlarge,1,10,0.918,2.79387,4.45548,361.733,37.0595,27.9387,34.0264,27.724,23.0267


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 359.47809851203147
Images 200/1000. Average i/s 368.8593927009845
Images 300/1000. Average i/s 369.3689269676493
Images 400/1000. Average i/s 375.1979932692287
Images 500/1000. Average i/s 374.63680974376683
Images 600/1000. Average i/s 373.7329000241914
Images 700/1000. Average i/s 370.0372652478046
Images 800/1000. Average i/s 369.3100084144382
Images 900/1000. Average i/s 369.6149914304928


Unnamed: 0,instance_type,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_4,inf1.xlarge,1,10,0.916,2.71946,4.3811,372.488,41.9521,27.1946,34.7743,26.9332,21.4508


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_5
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_5, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.


InternalError: 2 root error(s) found.
  (0) Internal: nrt::load failed with grpc status code 0, error message ""; nrt status code 9, details "[NMGR:kmgr_load_nn_from_neff] Insufficient number of VNCs: 4, required: 5
[NMGR:kmgr_load_nn] Failed to load NN: 1.4.1.0+737cbb69a-/home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_5/neuron_op_d6f098c01c780733, err: 9
[NRTD:load] DLR model load failed
"
	 [[node conv5_block3_3_bn/FusedBatchNormV3/ReadVariableOp/neuron_op_d6f098c01c780733 (defined at /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
  (1) Internal: nrt::load failed with grpc status code 0, error message ""; nrt status code 9, details "[NMGR:kmgr_load_nn_from_neff] Insufficient number of VNCs: 4, required: 5
[NMGR:kmgr_load_nn] Failed to load NN: 1.4.1.0+737cbb69a-/home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_5/neuron_op_d6f098c01c780733, err: 9
[NRTD:load] DLR model load failed
"
	 [[node conv5_block3_3_bn/FusedBatchNormV3/ReadVariableOp/neuron_op_d6f098c01c780733 (defined at /home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
	 [[conv5_block3_3_bn/FusedBatchNormV3/ReadVariableOp/neuron_op_d6f098c01c780733/_3]]
0 successful operations.
0 derived errors ignored.

Original stack trace for 'conv5_block3_3_bn/FusedBatchNormV3/ReadVariableOp/neuron_op_d6f098c01c780733':
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 619, in start
    self.io_loop.start()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/asyncio/base_events.py", line 442, in run_forever
    self._run_once()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/asyncio/base_events.py", line 1462, in _run_once
    handle._run()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/ioloop.py", line 688, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/ioloop.py", line 741, in _run_callback
    ret = callback()
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 814, in inner
    self.ctx_run(self.run)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/contextvars/__init__.py", line 38, in run
    return callable(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 775, in run
    yielded = self.gen.send(value)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 358, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/contextvars/__init__.py", line 38, in run
    return callable(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/contextvars/__init__.py", line 38, in run
    return callable(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 538, in execute_request
    user_expressions, allow_stdin,
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/contextvars/__init__.py", line 38, in run
    return callable(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 302, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 539, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2867, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2895, in _run_cell
    return runner(coro)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3072, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3263, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-6c5ac3661dcf>", line 27, in <module>
    warm_up=10)
  File "<ipython-input-13-b3806d957b95>", line 4, in inf1_predict_benchmark_single_threaded
    model_inf1 = tf.contrib.predictor.from_saved_model(neuron_saved_model_name)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/contrib/predictor/predictor_factories.py", line 153, in from_saved_model
    config=config)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/contrib/predictor/saved_model_predictor.py", line 153, in __init__
    loader.load(self._session, tags.split(','), export_dir)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 324, in new_func
    return func(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/saved_model/loader_impl.py", line 269, in load
    return loader.load(sess, tags, import_scope, **saver_kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/saved_model/loader_impl.py", line 422, in load
    **saver_kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/saved_model/loader_impl.py", line 352, in load_graph
    meta_graph_def, import_scope=import_scope, **saver_kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/training/saver.py", line 1477, in _import_meta_graph_with_return_elements
    **kwargs))
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/meta_graph.py", line 809, in import_scoped_meta_graph_with_return_elements
    return_elements=return_elements)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/importer.py", line 405, in import_graph_def
    producer_op_list=producer_op_list)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/importer.py", line 517, in _import_graph_def_internal
    _ProcessNewOps(graph)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/importer.py", line 243, in _ProcessNewOps
    for new_op in graph._add_new_tf_operations(compute_devices=False):  # pylint: disable=protected-access
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3561, in _add_new_tf_operations
    for c_op in c_api_util.new_tf_operations(self)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3561, in <listcomp>
    for c_op in c_api_util.new_tf_operations(self)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3451, in _create_op_from_tf_operation
    ret = Operation(c_op, self)
  File "/home/ubuntu/anaconda3/envs/aws_neuron_tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()
