# MIT-BIH-AFIB-06-tflite.ipynb
Experiment with TensorFlow Lite using a pre-trained model and datasets from the MIT-BIH Atrial Fibrillation Database.   
See https://physionet.org/content/afdb/1.0.0/  

In [1]:
# Environment setup.
import os
import pickle
import sys
import time
import platform
import numpy as np

import tensorflow as tf
import tensorflow.lite as tfl

from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc

import fileutils as fu
import model_utils as mu
import mit_bih_afib_db as db
import mit_bih_afib_tfrecord as tfr

os_name = platform.system()
print('OS name:            {} {}'.format(os_name, platform.release()))
print('Python version:     {}.{}.{}'.format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))
print('TensorFlow version: {}'.format(tf.__version__))
print('Numpy version:      {}'.format(np.__version__))
print('You are here: {}'.format(os.getcwd()))

OS name:            Windows 10
Python version:     3.10.10
TensorFlow version: 2.10.0
Numpy version:      1.23.2
You are here: D:\dev\jupyter\deep-cnn-embedded\src\mit-bih-afib


In [2]:
# Set the local data paths based on our environment.
if (os_name == 'Windows'):
    DATASET_PATH_ROOT = os.path.abspath(r'E:/Data/MIT-BIH-AFIB')
    db.LOCAL_TFRECORD_PATH = os.path.join(DATASET_PATH_ROOT, 'tfrecord')
    LOCAL_ECG_PATH = os.path.join(DATASET_PATH_ROOT, 'pkl')
else:
    DATASET_PATH_ROOT = os.getcwd()
    db.LOCAL_TFRECORD_PATH = os.path.join(DATASET_PATH_ROOT, 'tfrecord')
    LOCAL_ECG_PATH = os.path.join(DATASET_PATH_ROOT, 'pkl')
fu.mkpath(LOCAL_ECG_PATH)  # Create the ECG data directory 

In [3]:
# ECG parameters.
tfr.ECG_LENGTH = 7500
tfr.ECG_FEATURE = 'ecg_fir_z'

LABELS = {'N':0, 'AFIB':1}         # Data labels
CLASS_NAMES = list(LABELS.keys())  # Class names in same order as labels
NUM_CLASSES = len(LABELS)

test_csv_list_file = os.path.join(LOCAL_ECG_PATH, 'test_list.csv')
print('Test CSV list file: {}'.format(test_csv_list_file))

Test CSV list file: E:\Data\MIT-BIH-AFIB\pkl\test_list.csv


### Create a test dataset from previously generated files  
CSV data files can be loaded onto an embedded processor.  
Also creates a master list of the data files along with their one-hot encodings.  

In [4]:
# Create train, test and validation lists from existing CSV files.
# Using only the test list.
train_file = os.path.join(db.LOCAL_TFRECORD_PATH, 'tfrecord_train_list.csv') # Needed for function but not used
test_file = os.path.join(db.LOCAL_TFRECORD_PATH, 'tfrecord_test_list.csv')
val_file = os.path.join(db.LOCAL_TFRECORD_PATH, 'tfrecord_val_list.csv') # Needed for function but not used
train_list, test_list, val_list = tfr.get_tfrecord_lists(train_file, test_file, val_file, path=DATASET_PATH_ROOT)
test_size = len(test_list)
print('Test size: {}'.format(test_size))

Test size: 554


In [5]:
# Function to create an ECG pickle file from a TFRecord file.
# The ECG data is converted to a Python list and written in pickle format.
def write_ecg_pickle(tfrecord_file, ecg_tensor, local_data_path=LOCAL_ECG_PATH):
    # Create the data file name and make a path to it.
    ext = '.pkl'
    basename = os.path.splitext(os.path.basename(tfrecord_file))[0]
    pid = basename.split('_')[0]
    basename += ext
    filepath = os.path.join(local_data_path, pid)
    filename = os.path.join(filepath, basename)
    fu.mkpath(filepath)
    
    # Get the ECG data.
    ecg_data = ecg_tensor.numpy().tolist()
    
    # Write the ECG data.
    with open(filename, 'wb') as fd:
        pickle.dump(ecg_data, fd)
    
    # Return the data file name relative to the local data path.
    return os.path.join(pid, basename)

In [6]:
# Create ECG data files.
# Also creates a master data list CSV file.
test_ds = tf.data.TFRecordDataset(test_list)
test_ds = test_ds.map(tfr.ecg_map_2class)
i = 0
list_fd = fu.open_file(test_csv_list_file, 'w')

for tds in test_ds:
    # Get the TFRecord file name.
    tfrecord_file = test_list[i]
    i += 1
    
    # Create the ECG data file.
    ecg_filename = write_ecg_pickle(tfrecord_file, tds[0])
    
    # Get the one-hot encoding.
    one_hot = tds[1].numpy()
           
    # Write the file name and the one-hot encoding to the master list CSV file.
    list_fd.write('{},{:0.1f},{:0.1f}\n'.format(ecg_filename, one_hot[0], one_hot[1]))
        
fu.close_file(list_fd)
print('{} files created at {}.'.format(i, LOCAL_ECG_PATH))

554 files created at E:\Data\MIT-BIH-AFIB\pkl.


### Create TFLite models

In [7]:
model_dir = os.path.abspath('./checkpoint/InceptionTimeNetV4-D05-new')
model_name = 'InceptionTimeNetV4-D05-new'
saved_model_dir = os.path.join(model_dir, model_name)

# Create a TFLite model with no optimizations (float32 parameters).
converter = tfl.TFLiteConverter.from_saved_model(saved_model_dir)
tflite_model_f32 = converter.convert()
file_model_f32 = os.path.join(model_dir, model_name + '-f32.tflite')
with open(file_model_f32, 'wb') as fd:
    fd.write(tflite_model_f32)

# Create a TFLite model with optimizations (float16 parameters).
converter.optimizations = [tfl.Optimize.DEFAULT]
tflite_model_f16 = converter.convert()
file_model_f16 = os.path.join(model_dir, model_name + '-f16.tflite')
with open(file_model_f16, 'wb') as fd:
    fd.write(tflite_model_f16)

### Load and test the non-optimized TFLite model

In [6]:
# First, create the ground truth targets array from file list CSV file.
targets_list = []
with open(test_csv_list_file, 'r') as tfd:
    for line in tfd:
        (_, y0, y1) = line.strip().split(',')
        targets_list.append(np.array([y0, y1], dtype=np.float32))
targets = np.asarray(targets_list)
print('Targets shape: {}'.format(targets.shape))

Targets shape: (554, 2)


In [9]:
# Function to run predictions.
def run_predictions(interp):
    elapsed_time = 0.0
    pred_list = []
    list_fd = fu.open_file(test_csv_list_file)
    for line in list_fd:
        ecg_file = os.path.join(LOCAL_ECG_PATH, line.strip().split(',')[0])
        with open(ecg_file, 'rb') as ecg_fd:
            ecg_data = pickle.load(ecg_fd)
        input_data = tf.constant(ecg_data, shape=(1, tfr.ECG_LENGTH, 1))
        start = time.time()
        interp_f32.set_tensor(input_index, input_data)
        interp_f32.invoke()
        output_data = interp_f32.get_tensor(output_index)
        delta_time = time.time() - start
        elapsed_time += delta_time
        pred_list.append(output_data)
    fu.close_file(list_fd)
    print('Avg time: {:0.6f} s'.format(elapsed_time / len(pred_list)))
    
    # Create a classification array.
    predictions = np.squeeze(np.asarray(pred_list), axis=1)
    classifications = np.zeros(predictions.shape)
    idx_array = np.argmax(predictions, axis=1)
    for i in range(predictions.shape[0]):
        p = predictions[i]
        idx = idx_array[i]
        if (p[idx] > 0.5):
            classifications[i,idx] = 1
            
    return predictions, classifications

In [8]:
# Function to compute and print metrics.
def compute_metrics(targets, classifications):
    # Run a classification report.
    class_report = classification_report(targets, classifications, target_names=CLASS_NAMES, zero_division=0)
    print(class_report)
    
    # Additional metrics for each class.
    for i in range(NUM_CLASSES):
        cm = confusion_matrix(targets[:,i], classifications[:,i])
        acc = (cm[0,0] + cm[1,1]) / np.sum(cm)
        ppv = cm[1,1] / (cm[1,1] + cm[0,1])
        fpr, tpr, _ = roc_curve(targets[:,i], classifications[:,i])
        roc_auc = auc(fpr, tpr)
        print('Confusion matrix for class {}:'.format(CLASS_NAMES[i]))
        print(cm)
        print('ACC: {:0.4f}'.format(acc))
        print('PPV: {:0.4f}'.format(ppv))
        print('AUC: {:0.4f}'.format(roc_auc))
        print()

    no_class_counts = np.zeros(NUM_CLASSES)
    num_examples = targets.shape[0]
    for i in range(num_examples):
        if np.sum(classifications[i]) == 0:
            no_class_counts[np.argmax(targets[i])] += 1
    no_class_sum = np.sum(no_class_counts)
    no_class_pct = 100. * no_class_sum / num_examples
    print('There are {} predictions with no classification ({:0.2f}%).'.format(int(no_class_sum), no_class_pct))
    for i in range(NUM_CLASSES):
        print('True {:5s}: {}'.format(CLASS_NAMES[i], int(no_class_counts[i])))

In [11]:
# Load the non-optimized model.
interp_f32 = tfl.Interpreter(file_model_f32)
interp_f32.allocate_tensors()
input_index = interp_f32.get_input_details()[0]['index']
output_index = interp_f32.get_output_details()[0]['index']

In [12]:
# Run predictions.
predictions, classifications = run_predictions(interp_f32)

Avg time: 0.153633 s


In [13]:
# Compute metrics.
compute_metrics(targets, classifications)

              precision    recall  f1-score   support

           N       0.99      0.99      0.99       277
        AFIB       0.99      0.99      0.99       277

   micro avg       0.99      0.99      0.99       554
   macro avg       0.99      0.99      0.99       554
weighted avg       0.99      0.99      0.99       554
 samples avg       0.99      0.99      0.99       554

Confusion matrix for class N:
[[274   3]
 [  2 275]]
ACC: 0.9910
PPV: 0.9892
AUC: 0.9910

Confusion matrix for class AFIB:
[[275   2]
 [  3 274]]
ACC: 0.9910
PPV: 0.9928
AUC: 0.9910

There are 0 predictions with no classification (0.00%).
True N    : 0
True AFIB : 0


### Load and test the optimized TFLite model

In [14]:
# Load the optimized model.
interp_f16 = tfl.Interpreter(file_model_f16)
interp_f16.allocate_tensors()
input_index = interp_f16.get_input_details()[0]['index']
output_index = interp_f16.get_output_details()[0]['index']

In [15]:
# Run predictions.
predictions, classifications = run_predictions(interp_f16)

Avg time: 0.156243 s


In [16]:
# Compute metrics.
compute_metrics(targets, classifications)

              precision    recall  f1-score   support

           N       0.99      0.99      0.99       277
        AFIB       0.99      0.99      0.99       277

   micro avg       0.99      0.99      0.99       554
   macro avg       0.99      0.99      0.99       554
weighted avg       0.99      0.99      0.99       554
 samples avg       0.99      0.99      0.99       554

Confusion matrix for class N:
[[274   3]
 [  2 275]]
ACC: 0.9910
PPV: 0.9892
AUC: 0.9910

Confusion matrix for class AFIB:
[[275   2]
 [  3 274]]
ACC: 0.9910
PPV: 0.9928
AUC: 0.9910

There are 0 predictions with no classification (0.00%).
True N    : 0
True AFIB : 0


### Check predictions run on an embedded system

In [11]:
pred_list = ['predictions-rpi-f32.pkl', 'predictions-rpi-f16.pkl']
for pred_file in pred_list:
    with open(pred_file, 'rb') as pfd:
        p_list = pickle.load(pfd)
    predictions = np.squeeze(np.asarray(p_list), axis=1)
    
    # Create a classifications array.
    classifications = np.zeros(predictions.shape)
    idx_array = np.argmax(predictions, axis=1)
    for i in range(predictions.shape[0]):
        p = predictions[i]
        idx = idx_array[i]
        if (p[idx] > 0.5):
            classifications[i,idx] = 1
    
    # Compute metrics.
    print('\nMetrics for file: {}'.format(pred_file))
    compute_metrics(targets, classifications)


Metrics for file: predictions-rpi-f32.pkl
              precision    recall  f1-score   support

           N       0.99      0.99      0.99       277
        AFIB       0.99      0.99      0.99       277

   micro avg       0.99      0.99      0.99       554
   macro avg       0.99      0.99      0.99       554
weighted avg       0.99      0.99      0.99       554
 samples avg       0.99      0.99      0.99       554

Confusion matrix for class N:
[[274   3]
 [  2 275]]
ACC: 0.9910
PPV: 0.9892
AUC: 0.9910

Confusion matrix for class AFIB:
[[275   2]
 [  3 274]]
ACC: 0.9910
PPV: 0.9928
AUC: 0.9910

There are 0 predictions with no classification (0.00%).
True N    : 0
True AFIB : 0

Metrics for file: predictions-rpi-f16.pkl
              precision    recall  f1-score   support

           N       0.99      0.99      0.99       277
        AFIB       0.99      0.99      0.99       277

   micro avg       0.99      0.99      0.99       554
   macro avg       0.99      0.99      0.99     