In [1]:
import tensorflow.keras as K
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
import time
import os
import copy
import csv
from tensorflow import keras
import tensorflow as tf
import pandas as pd
from datetime import datetime
from sklearn import metrics
import tf2onnx
import numpy as np
import math

In [2]:
batch_size = 500 # Number of images processed at once
nb_classes = 10  # 10 Digits from 0 to 9

# Dimensionen of the input images (28x28 pixel)
img_rows, img_cols = 28, 28

# Load image data with labels, split into test and training set 
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# reshape images in 4D tensor (N images, 28 rows, 28 columns, 1 channel) 
# rescale pixels range from [0, 255] to [0, 1]
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255
print('X_train shape: ', X_train.shape)
print(X_train.shape[0], "training samples")
print(X_test.shape[0], "test samples")

# convert digit labels (0-9) in one-hot encoded binary vectors. 
# These correspond to the training/test labels at the output of the net. 
Y_train = tf.keras.utils.to_categorical(y_train, nb_classes)
Y_test = tf.keras.utils.to_categorical(y_test, nb_classes)
print("One-hot encoding: {}".format(Y_train[0, :]))

X_train shape:  (60000, 28, 28, 1)
60000 training samples
10000 test samples
One-hot encoding: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() ## for Lenet5

In [4]:
class DataGenerator(tf.compat.v2.keras.utils.Sequence):
 
    def __init__(self, X_data , y_data, batch_size, dim, n_classes,
                 to_fit, shuffle = True):
        self.batch_size = batch_size
        self.X_data = X_data
        self.labels = y_data
        self.y_data = y_data
        self.to_fit = to_fit
        self.n_classes = n_classes
        self.dim = dim
        self.shuffle = shuffle
        self.n = 0
        self.list_IDs = np.arange(len(self.X_data))
        self.on_epoch_end()
    def __next__(self):
        # Get one batch of data
        data = self.__getitem__(self.n)
        # Batch index
        self.n += 1
        
        # If we have processed the entire dataset then
        if self.n >= self.__len__():
            self.on_epoch_end
            self.n = 0
        
        return data
    def __len__(self):
        # Return the number of batches of the dataset
        return math.ceil(len(self.indexes)/self.batch_size)
    def __getitem__(self, index):
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:
            (index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        X = self._generate_x(list_IDs_temp)
        
        if self.to_fit:
            y = self._generate_y(list_IDs_temp)
            return X, y
        else:
            return X
    def on_epoch_end(self):
        
        self.indexes = np.arange(len(self.X_data))
        
        if self.shuffle: 
            np.random.shuffle(self.indexes)
    def _generate_x(self, list_IDs_temp):
               
        X = np.empty((self.batch_size, *self.dim))
        
        for i, ID in enumerate(list_IDs_temp):
            
            X[i,] = self.X_data[ID]
            
            # Normalize data
            X = (X/255).astype('float32')
            
        return X[:,:,:, np.newaxis]
    def _generate_y(self, list_IDs_temp):
        
        y = np.empty(self.batch_size)
        
        for i, ID in enumerate(list_IDs_temp):
            
            y[i] = self.y_data[ID]
            
        return keras.utils.to_categorical(y,num_classes=self.n_classes)

In [5]:
n_classes = 10
nb_classes = 10
input_shape = (28, 28) #Lenet5

In [6]:
training_id = 1
model_short_name = 'Lenet5'
framework = 'Keras'

In [7]:
train_generator = DataGenerator(X_train, Y_train, batch_size = batch_size,
                                dim = input_shape,
                                n_classes=nb_classes, 
                                to_fit=True, shuffle=True)
val_generator =  DataGenerator(X_test, Y_test, batch_size=batch_size, 
                               dim = input_shape, 
                               n_classes= nb_classes, 
                               to_fit=True, shuffle=True)

In [8]:
def score_model(y_test, test_predict):
    #y_test = np.argmax(y_test, axis=1)
    #test_predict = np.argmax(test_predict, axis=1)
    # accuracy: (tp + tn) / (p + n)
    accuracy = metrics.accuracy_score(y_test, test_predict)
    
    # precision tp / (tp + fp)
    precision = metrics.precision_score(y_test, test_predict, average='macro')
    #print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = metrics.recall_score(y_test, test_predict, average='macro')
    #print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = metrics.f1_score(y_test, test_predict, average='macro')
    #print('F1 score: %f' % f1)
    print('Accuracy: %f' % accuracy, ', Precision: %f' % precision, 'Recall: %f' % recall, 'F1 score: %f' % f1)
    return accuracy, precision, recall, f1

In [9]:
path = '/Volumes/Cisco/Fall2021/onnx-exchange/Training/Keras/lenet5/'
since_0 = time.time()
#model_path = 'tf_Lenet5_mnist_2021-08-24-10:35:35'
#model_name = 'tf_alexnet_cifar10_2021-08-27-17:05:27'
model_name = 'tf_Lenet5_mnist_2021-10-27_{}'.format(training_id)
model = tf.keras.models.load_model(path+ model_name+'.h5')
t_elapsed_0 = time.time() - since_0
size0 = os.path.getsize(path+ model_name+'.h5')
size0

601864

In [26]:
def predict(model, data):
    correct = 0.0
    total = 0.0
    time_list = []
    for i, (images, labels) in enumerate(data):
        #torch.cuda.empty_cache()
        #images = images.cuda()
        since = time.time()
        k_predict = model.predict(images)
        time_elapsed = time.time() - since
        pred = np.argmax(k_predict, axis=1)
        #pred = pred.data.cpu()
        #print(k_predict.shape)
        labels_ = np.argmax(labels, axis=1)
        if i%50 == 0:
            print(i)
        #print(pred, labels_)
        total += k_predict.shape[0]
        correct += np.sum(pred == labels_)
        time_list.append(time_elapsed)
    print('accuracy: {}%'.format(correct*100./total),'time {:.0f}m {:.0f}s, {}'.format(np.mean(time_list) // 60, np.mean(time_list) % 60, np.mean(time_list)) )

In [27]:
val_generator =  DataGenerator(x_test, y_test, batch_size=128,  dim = input_shape, 
                           n_classes= nb_classes, 
                           to_fit=True, shuffle=True)

predict(model, val_generator)

0
50
accuracy: 10.413370253164556% time 0m 0s, 0.18316028087953978


In [44]:
def to_onnx(i, x, labels, data_writer):
    print("converting for batch: ", i)
    #torch.random.manual_seed(42)
    #x = torch.randn(10000, 3, 32, 32, requires_grad=True)
    since_ = time.time()
    since_1 = time.time()
    #model = torch.load(path+model_name+'.pth')
    with tf.device('/cpu:0'): 
        k_predict = model.predict(x)
    t_elapsed_1 = time.time() - since_1
    y_pred = (k_predict > 0.5)
    # Export the model
    print(y_pred)
    print("*************")
    print(labels)
    since_1 = time.time()
    
    score_model(labels, k_predict)
    

In [45]:
def _lets_convert(data, data_writer):
    since = time.time()
    for i, (images, labels) in enumerate(data):
        #torch.cuda.empty_cache()
        #images = images.cuda()
        to_onnx(i, images,labels, data_writer)
        if i == 20:
            break
    time_elapsed = time.time() - since
    print('Conversion complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60) )

In [46]:
#model_name = 'letnet5-keras'
import pandas as pd 
path_output = '/Volumes/Cisco/Summer2022/onnx-exchange/analysis/prediction/'

data_file = open(path_output + 'prediction_{}_{}.csv'.format(framework, model_name), mode='w', newline='',
                                  encoding='utf-8')
data_writer = csv.writer(data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
data_writer.writerow(['model','framework', 'training_id', 'model_full', 'accuracy', 'precision', 'recall', 'f1'])

val_generator =  DataGenerator(x_test, y_test, batch_size=128,  dim = input_shape, 
                           n_classes= nb_classes, 
                           to_fit=True, shuffle=True)
_lets_convert(val_generator, data_writer)
data_file_error.close()
data_file_run.close()

converting for batch:  0
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False  True ... False False False]]
*************
[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 1. ... 0. 0. 0.]]


ValueError: Classification metrics can't handle a mix of multilabel-indicator and continuous-multioutput targets