In [None]:
from google.colab import drive as drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Access a public Github repos
# !rm -rf /content/Brain-Stroke-CT-Image-Dataset_Median-Mean-Hybrid-Filter # remove git clone
%cd '/content'
!git clone https://github.com/ricardotran92/Brain-Stroke-CT-Image-Dataset_Median-Mean-Hybrid-Filter.git

/content
Cloning into 'Brain-Stroke-CT-Image-Dataset_Median-Mean-Hybrid-Filter'...
remote: Enumerating objects: 2510, done.[K
remote: Total 2510 (delta 0), reused 0 (delta 0), pack-reused 2510[K
Receiving objects: 100% (2510/2510), 110.49 MiB | 13.57 MiB/s, done.


In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
from PIL import Image


In [None]:
# @title
def DataSetSummary (local_path, tabs):
  # Take list of files & folders in local folder
  all_items = os.listdir(local_path)

  # Filter folder list
  folders = [item for item in all_items if os.path.isdir(os.path.join(local_path, item))]

  # Print summary information if folders > 0:
  if(len(folders) > 0):
    indent = '\t' * tabs
    print("{}Tổng số thư mục: {}".format(indent, len(folders)))
    for folder in folders:
      folder_path = os.path.join(local_path, folder)
      files_in_folder = len(os.listdir(folder_path))
      print("{}-Thư mục: {}, Số tệp: {}".format(indent, folder, files_in_folder))
      DataSetSummary(folder_path, tabs+1)

# DataSetSummary(local_path, 0)

In [None]:
# import os

# Hierarchical local folder in Colab
repository_folder = ["Brain-Stroke-CT-Image-Dataset_Median-Mean-Hybrid-Filter"]

for folder in repository_folder:
    local_path = '/content/' + folder + '/dataset'
    DataSetSummary(local_path, 0)
    local_path = '/content/' + folder + '/sample'
    DataSetSummary(local_path, 0)

Tổng số thư mục: 2
-Thư mục: Normal, Số tệp: 1551
-Thư mục: Stroke, Số tệp: 950
Tổng số thư mục: 2
-Thư mục: Normal, Số tệp: 20
-Thư mục: Stroke, Số tệp: 20


In [None]:
%cd /content/drive/MyDrive/Colab Notebooks/Thesis/Model/09_07

/content/drive/.shortcut-targets-by-id/1FC4gIMvvGGW3aULXiiL6wB3DkEQoP4FR/Thesis/Model/09_07


In [None]:
# Set the path to your dataset
# dataset_path = '/content/Brain-Stroke-CT-Image-Dataset_Median-Mean-Hybrid-Filter/sample'
dataset_path = '/content/Brain-Stroke-CT-Image-Dataset_Median-Mean-Hybrid-Filter/dataset'

## Load dataset

In [None]:
# Convert strange format supported format with Tensorflow (JPEG, PNG, BMP, GIF)
from PIL import Image
import os

# dataset_path = 'path_to_your_dataset'

for foldername in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, foldername)
    for filename in os.listdir(folder_path):
        if filename.endswith(('.jfif', '.tiff', '.tif')):
            print(f"Reformatting {filename} to JPG format...")
            img = Image.open(os.path.join(folder_path, filename))
            # Remove the extension from the filename
            base_filename = os.path.splitext(filename)[0]
            img.save(os.path.join(folder_path, base_filename + '.jpg'))
            print(f"Reformatted {filename} to JPG format.")

In [None]:
# Load image with image_dataset_from_directory
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Define the parameters: image size, colors
# image_size = (224,224)
batch_size=32
color_mode = 'rgb'
label_mode = "binary"
class_names = sorted(os.listdir(dataset_path))
print("Class names:", class_names)

# Load the test dataset
val_dataset_224 = image_dataset_from_directory(
    directory= dataset_path,
    labels="inferred",
    label_mode=label_mode,
    class_names=class_names,
    color_mode=color_mode,
    batch_size=batch_size,
    image_size=(224, 224),
    # shuffle=True,
    seed=42,
    validation_split=0.2,
    subset="validation",
    interpolation="bilinear",
    # follow_links=False,
    # crop_to_aspect_ratio=False,
    # data_format=None,
)

# Load the test dataset
val_dataset_380 = image_dataset_from_directory(
    directory= dataset_path,
    labels="inferred",
    label_mode=label_mode,
    class_names=class_names,
    color_mode=color_mode,
    batch_size=batch_size,
    image_size=(380, 380),
    # shuffle=True,
    seed=42,
    validation_split=0.2,
    subset="validation",
    interpolation="bilinear",
    # follow_links=False,
    # crop_to_aspect_ratio=False,
    # data_format=None,
)

# Get the class names
# class_names = train_dataset.class_names
num_classes = len(class_names)

# print("Class names:", train_dataset.class_names)
print("Number of classes: ", num_classes)

Class names: ['Normal', 'Stroke']
Found 2501 files belonging to 2 classes.
Using 500 files for validation.
Found 2501 files belonging to 2 classes.
Using 500 files for validation.
Number of classes:  2


## Model configuration

In [None]:
from keras.applications import EfficientNetB4, ResNet50, DenseNet169, VGG16, MobileNetV2
from keras.layers import GlobalAveragePooling2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.models import Model
import os
import numpy as np
import time
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the model
efficientnetb4 = EfficientNetB4(include_top=False, weights="imagenet", input_shape=(380,380,3))
resnet50 = ResNet50(include_top=False, weights="imagenet", input_shape=(224,224,3))
densenet169 = DenseNet169(include_top=False, weights="imagenet", input_shape=(224,224,3))
vgg16 = VGG16(include_top=False, weights="imagenet", input_shape=(224,224,3))
mobilenetv2 = MobileNetV2(include_top=False, weights="imagenet", input_shape=(224,224,3))

def customModel(cnn):
    # Freeze the layers
    for layer in cnn.layers:
        layer.trainable = False

    # Add custom layers
    pool = GlobalAveragePooling2D()(cnn.output)
    dropout = Dropout(rate=0.4)(pool)
    fc1 = Dense(1024, activation='relu')(dropout)
    output = Dense(1, activation='sigmoid')(fc1)

    # Create a new model
    model = Model(inputs = cnn.inputs, outputs = output)
    return model


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


## EfficientNetB4

In [None]:
model = customModel(efficientnetb4)

# Print the model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 380, 380, 3)]        0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 380, 380, 3)          0         ['input_1[0][0]']             
                                                                                                  
 normalization (Normalizati  (None, 380, 380, 3)          7         ['rescaling[0][0]']           
 on)                                                                                              
                                                                                                  
 rescaling_1 (Rescaling)     (None, 380, 380, 3)          0         ['normalization[0][0]']   

##### Best val

In [None]:
# from pickle import load
# input = open('history_VGG16.pkl', 'rb')
# history = load(input)
# input.close()

from tensorflow.keras.models import load_model

# Load the saved best model
model_path = '/content/drive/MyDrive/Colab Notebooks/Thesis/Model/09_07'
model = load_model(model_path + '/09_07_01/' + 'best_model_val_acc.h5')

val_metrics = ['Val']

#### Inference Throughput

In [None]:
import time
import numpy as np
import pandas as pd

def measure_throughput(test_dataset, model, batch_size, num_trials=15):
    throughput_measurements = []

    for trial in range(num_trials):
        start_time = time.time()

        # Thực hiện dự đoán trên toàn bộ tập dữ liệu kiểm tra
        for img, label in test_dataset:
            prediction = model.predict(img)

        end_time = time.time()
        total_time = end_time - start_time
        total_samples = len(test_dataset) * batch_size
        throughput = total_samples / total_time

        print(f"Trial {trial+1}: Throughput = {throughput} samples/second")
        throughput_measurements.append({"Trial": trial+1, "Throughput": throughput})

        # throughput_measurements.append(throughput)

    # Tính giá trị trung bình của throughput
    average_throughput = np.mean([throughput_measurements["Throughput"] for throughput_measurements in throughput_measurements])
    print(f"Average Throughput: {average_throughput} samples/second")

    # Thêm kết quả trung bình vào danh sách kết quả
    throughput_measurements.append({"Trial": "Average", "Throughput": average_throughput})

    return throughput_measurements

    # # Tính giá trị trung bình của throughput
    # average_throughput = np.mean(throughput_measurements)
    # return average_throughput

# # Gọi hàm và in kết quả
# average_throughput = measure_throughput(val_dataset, model, batch_size)
# print(f"Average Inference Throughput: {average_throughput} samples/second")

# Gọi hàm và lưu kết quả vào một DataFrame
results = measure_throughput(val_dataset_380, model, batch_size)
df_results = pd.DataFrame(results)

# Xuất kết quả ra file Excel
# excel_filename = "/content/throughput_measurements.xlsx"
# df_results.to_excel(excel_filename, index=False)
df_results.to_excel("throughput_efficientnetb4.xlsx", index=False)

# print(f"Kết quả đã được lưu vào {excel_filename}")
print(f"Kết quả đã được lưu vào throughput_efficientnetb4.xlsx")


Trial 1: Throughput = 19.599354456691216 samples/second
Trial 2: Throughput = 48.76743592176958 samples/second
Trial 3: Throughput = 48.867421981855635 samples/second
Trial 4: Throughput = 49.52386843786739 samples/second
Trial 5: Throughput = 49.507184740227196 samples/second
Trial 6: Throughput = 50.174365085011445 samples/second
Trial 7: Throughput = 47.54042766919562 samples/second
Trial 8: Throughput = 50.99860415935671 samples/second
Trial 9: Throughput = 50.078623407796634 samples/second
Trial 10: Throughput = 52.095116533842315 samples/second
Trial 11: Throughput = 47.88171247355644 samples/second
Trial 12: Throughput = 52.65737362587729 samples/second
Trial 13: Throughput = 50.43312338433705 samples/second
Trial 14: Throughput = 47.81299998501588 samples/second
Trial 15: Throughput = 48.197481228705556 samples/second
Average Throughput: 47.60900620607372 samples/second
Kết quả đã được lưu vào throughput_measurements_efficientnetb4.xlsx


## ResNet50

In [None]:
model = customModel(resnet50)

# Print the model summary
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                        

##### Best val

In [None]:
# from pickle import load
# input = open('history_VGG16.pkl', 'rb')
# history = load(input)
# input.close()

from tensorflow.keras.models import load_model

# Load the saved best model
model_path = '/content/drive/MyDrive/Colab Notebooks/Thesis/Model/09_07'
model = load_model(model_path + '/09_07_02/' + 'best_model_val_acc.h5')

val_metrics = ['Val']

#### Inference Throughput

In [None]:
import time
import numpy as np
import pandas as pd

def measure_throughput(test_dataset, model, batch_size, num_trials=15):
    throughput_measurements = []

    for trial in range(num_trials):
        start_time = time.time()

        # Thực hiện dự đoán trên toàn bộ tập dữ liệu kiểm tra
        for img, label in test_dataset:
            prediction = model.predict(img)

        end_time = time.time()
        total_time = end_time - start_time
        total_samples = len(test_dataset) * batch_size
        throughput = total_samples / total_time

        print(f"Trial {trial+1}: Throughput = {throughput} samples/second")
        throughput_measurements.append({"Trial": trial+1, "Throughput": throughput})

        # throughput_measurements.append(throughput)

    # Tính giá trị trung bình của throughput
    average_throughput = np.mean([throughput_measurements["Throughput"] for throughput_measurements in throughput_measurements])
    print(f"Average Throughput: {average_throughput} samples/second")

    # Thêm kết quả trung bình vào danh sách kết quả
    throughput_measurements.append({"Trial": "Average", "Throughput": average_throughput})

    return throughput_measurements

    # # Tính giá trị trung bình của throughput
    # average_throughput = np.mean(throughput_measurements)
    # return average_throughput

# # Gọi hàm và in kết quả
# average_throughput = measure_throughput(val_dataset, model, batch_size)
# print(f"Average Inference Throughput: {average_throughput} samples/second")

# Gọi hàm và lưu kết quả vào một DataFrame
results = measure_throughput(val_dataset_224, model, batch_size)
df_results = pd.DataFrame(results)

# Xuất kết quả ra file Excel
# excel_filename = "/content/throughput_measurements.xlsx"
# df_results.to_excel(excel_filename, index=False)
df_results.to_excel("throughput_resnet50.xlsx", index=False)

# print(f"Kết quả đã được lưu vào {excel_filename}")
print(f"Kết quả đã được lưu vào throughput_resnet50.xlsx")


Trial 1: Throughput = 53.52229338256043 samples/second
Trial 2: Throughput = 160.72309613342566 samples/second
Trial 3: Throughput = 150.45600237144285 samples/second
Trial 4: Throughput = 100.30757399649157 samples/second
Trial 5: Throughput = 152.60883850322955 samples/second
Trial 6: Throughput = 156.5861015378367 samples/second
Trial 7: Throughput = 100.34012866622727 samples/second
Trial 8: Throughput = 151.45183988763353 samples/second
Trial 9: Throughput = 166.39759883530866 samples/second
Trial 10: Throughput = 160.2168586572897 samples/second
Trial 11: Throughput = 121.63496352990694 samples/second
Trial 12: Throughput = 163.58001321142422 samples/second
Trial 13: Throughput = 100.32547035718335 samples/second
Trial 14: Throughput = 100.3308278545354 samples/second
Trial 15: Throughput = 100.3285872917214 samples/second
Average Throughput: 129.25401294774778 samples/second
Kết quả đã được lưu vào throughput_resnet50.xlsx


## DenseNet169

In [None]:
model = customModel(densenet169)

# Print the model summary
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 zero_padding2d (ZeroPaddin  (None, 230, 230, 3)          0         ['input_3[0][0]']             
 g2D)                                                                                             
                                                                                                  
 conv1/conv (Conv2D)         (None, 112, 112, 64)         9408      ['zero_padding2d[0][0]']      
                                                                                                  
 conv1/bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1/conv[0][0]']    

##### Best val

In [None]:
# from pickle import load
# input = open('history_VGG16.pkl', 'rb')
# history = load(input)
# input.close()

from tensorflow.keras.models import load_model

# Load the saved best model
model_path = '/content/drive/MyDrive/Colab Notebooks/Thesis/Model/09_07'
model = load_model(model_path + '/09_07_03/' + 'best_model_val_acc.h5')

val_metrics = ['Val']

#### Inference Throughput

In [None]:
import time
import numpy as np
import pandas as pd

def measure_throughput(test_dataset, model, batch_size, num_trials=15):
    throughput_measurements = []

    for trial in range(num_trials):
        start_time = time.time()

        # Thực hiện dự đoán trên toàn bộ tập dữ liệu kiểm tra
        for img, label in test_dataset:
            prediction = model.predict(img)

        end_time = time.time()
        total_time = end_time - start_time
        total_samples = len(test_dataset) * batch_size
        throughput = total_samples / total_time

        print(f"Trial {trial+1}: Throughput = {throughput} samples/second")
        throughput_measurements.append({"Trial": trial+1, "Throughput": throughput})

        # throughput_measurements.append(throughput)

    # Tính giá trị trung bình của throughput
    average_throughput = np.mean([throughput_measurements["Throughput"] for throughput_measurements in throughput_measurements])
    print(f"Average Throughput: {average_throughput} samples/second")

    # Thêm kết quả trung bình vào danh sách kết quả
    throughput_measurements.append({"Trial": "Average", "Throughput": average_throughput})

    return throughput_measurements

    # # Tính giá trị trung bình của throughput
    # average_throughput = np.mean(throughput_measurements)
    # return average_throughput

# # Gọi hàm và in kết quả
# average_throughput = measure_throughput(val_dataset, model, batch_size)
# print(f"Average Inference Throughput: {average_throughput} samples/second")

# Gọi hàm và lưu kết quả vào một DataFrame
results = measure_throughput(val_dataset_224, model, batch_size)
df_results = pd.DataFrame(results)

# Xuất kết quả ra file Excel
# excel_filename = "/content/throughput_measurements.xlsx"
# df_results.to_excel(excel_filename, index=False)
df_results.to_excel("throughput_densenet169.xlsx", index=False)

# print(f"Kết quả đã được lưu vào {excel_filename}")
print(f"Kết quả đã được lưu vào throughput_densenet169.xlsx")


Trial 1: Throughput = 12.504700787191902 samples/second
Trial 2: Throughput = 109.21544004291144 samples/second
Trial 3: Throughput = 100.27141650341406 samples/second
Trial 4: Throughput = 142.78319362193423 samples/second
Trial 5: Throughput = 100.3030669385029 samples/second
Trial 6: Throughput = 144.03124271035395 samples/second
Trial 7: Throughput = 144.70840935776943 samples/second
Trial 8: Throughput = 139.88347197915678 samples/second
Trial 9: Throughput = 120.32069522191256 samples/second
Trial 10: Throughput = 147.97448080544484 samples/second
Trial 11: Throughput = 138.4709765524778 samples/second
Trial 12: Throughput = 100.32137879576243 samples/second
Trial 13: Throughput = 149.78610228275062 samples/second
Trial 14: Throughput = 152.70760271344454 samples/second
Trial 15: Throughput = 100.30172239569323 samples/second
Average Throughput: 120.23892671391472 samples/second
Kết quả đã được lưu vào throughput_densenet169.xlsx


## VGG16

In [None]:
model = customModel(vgg16)

# Print the model summary
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

##### Best val

In [None]:
# from pickle import load
# input = open('history_VGG16.pkl', 'rb')
# history = load(input)
# input.close()

from tensorflow.keras.models import load_model

# Load the saved best model
model_path = '/content/drive/MyDrive/Colab Notebooks/Thesis/Model/09_07'
model = load_model(model_path + '/09_07_04/' + 'best_model_val_acc.h5')

val_metrics = ['Val']

#### Inference Throughput

In [None]:
import time
import numpy as np
import pandas as pd

def measure_throughput(test_dataset, model, batch_size, num_trials=15):
    throughput_measurements = []

    for trial in range(num_trials):
        start_time = time.time()

        # Thực hiện dự đoán trên toàn bộ tập dữ liệu kiểm tra
        for img, label in test_dataset:
            prediction = model.predict(img)

        end_time = time.time()
        total_time = end_time - start_time
        total_samples = len(test_dataset) * batch_size
        throughput = total_samples / total_time

        print(f"Trial {trial+1}: Throughput = {throughput} samples/second")
        throughput_measurements.append({"Trial": trial+1, "Throughput": throughput})

        # throughput_measurements.append(throughput)

    # Tính giá trị trung bình của throughput
    average_throughput = np.mean([throughput_measurements["Throughput"] for throughput_measurements in throughput_measurements])
    print(f"Average Throughput: {average_throughput} samples/second")

    # Thêm kết quả trung bình vào danh sách kết quả
    throughput_measurements.append({"Trial": "Average", "Throughput": average_throughput})

    return throughput_measurements

    # # Tính giá trị trung bình của throughput
    # average_throughput = np.mean(throughput_measurements)
    # return average_throughput

# # Gọi hàm và in kết quả
# average_throughput = measure_throughput(val_dataset, model, batch_size)
# print(f"Average Inference Throughput: {average_throughput} samples/second")

# Gọi hàm và lưu kết quả vào một DataFrame
results = measure_throughput(val_dataset_224, model, batch_size)
df_results = pd.DataFrame(results)

# Xuất kết quả ra file Excel
# excel_filename = "/content/throughput_measurements.xlsx"
# df_results.to_excel(excel_filename, index=False)
df_results.to_excel("throughput_vgg16.xlsx", index=False)

# print(f"Kết quả đã được lưu vào {excel_filename}")
print(f"Kết quả đã được lưu vào throughput_vgg16.xlsx")


Trial 1: Throughput = 25.020327764245597 samples/second
Trial 2: Throughput = 145.3337526557027 samples/second
Trial 3: Throughput = 131.00052699518983 samples/second
Trial 4: Throughput = 128.1535806591948 samples/second
Trial 5: Throughput = 126.70810301981193 samples/second
Trial 6: Throughput = 145.9426610534551 samples/second
Trial 7: Throughput = 145.2779272811971 samples/second
Trial 8: Throughput = 110.80535326795724 samples/second
Trial 9: Throughput = 147.37105521223995 samples/second
Trial 10: Throughput = 144.49531878702405 samples/second
Trial 11: Throughput = 100.31312638667312 samples/second
Trial 12: Throughput = 100.3127421508477 samples/second
Trial 13: Throughput = 113.0336114253297 samples/second
Trial 14: Throughput = 117.44585762161331 samples/second
Trial 15: Throughput = 148.84779135227603 samples/second
Average Throughput: 122.00411570885053 samples/second
Kết quả đã được lưu vào throughput_vgg16.xlsx


## MobileNetV2

In [None]:
model = customModel(mobilenetv2)

# Print the model summary
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 Conv1 (Conv2D)              (None, 112, 112, 32)         864       ['input_5[0][0]']             
                                                                                                  
 bn_Conv1 (BatchNormalizati  (None, 112, 112, 32)         128       ['Conv1[0][0]']               
 on)                                                                                              
                                                                                                  
 Conv1_relu (ReLU)           (None, 112, 112, 32)         0         ['bn_Conv1[0][0]']      

##### Best val

In [None]:
# from pickle import load
# input = open('history_VGG16.pkl', 'rb')
# history = load(input)
# input.close()

from tensorflow.keras.models import load_model

# Load the saved best model
model_path = '/content/drive/MyDrive/Colab Notebooks/Thesis/Model/09_07'
model = load_model(model_path + '/09_07_05/' + 'best_model_val_acc.h5')

val_metrics = ['Val']

#### Inference Throughput

In [None]:
import time
import numpy as np
import pandas as pd

def measure_throughput(test_dataset, model, batch_size, num_trials=15):
    throughput_measurements = []

    for trial in range(num_trials):
        start_time = time.time()

        # Thực hiện dự đoán trên toàn bộ tập dữ liệu kiểm tra
        for img, label in test_dataset:
            prediction = model.predict(img)

        end_time = time.time()
        total_time = end_time - start_time
        total_samples = len(test_dataset) * batch_size
        throughput = total_samples / total_time

        print(f"Trial {trial+1}: Throughput = {throughput} samples/second")
        throughput_measurements.append({"Trial": trial+1, "Throughput": throughput})

        # throughput_measurements.append(throughput)

    # Tính giá trị trung bình của throughput
    average_throughput = np.mean([throughput_measurements["Throughput"] for throughput_measurements in throughput_measurements])
    print(f"Average Throughput: {average_throughput} samples/second")

    # Thêm kết quả trung bình vào danh sách kết quả
    throughput_measurements.append({"Trial": "Average", "Throughput": average_throughput})

    return throughput_measurements

    # # Tính giá trị trung bình của throughput
    # average_throughput = np.mean(throughput_measurements)
    # return average_throughput

# # Gọi hàm và in kết quả
# average_throughput = measure_throughput(val_dataset, model, batch_size)
# print(f"Average Inference Throughput: {average_throughput} samples/second")

# Gọi hàm và lưu kết quả vào một DataFrame
results = measure_throughput(val_dataset_224, model, batch_size)
df_results = pd.DataFrame(results)

# Xuất kết quả ra file Excel
# excel_filename = "/content/throughput_measurements.xlsx"
# df_results.to_excel(excel_filename, index=False)
df_results.to_excel("throughput_mobilenetv2.xlsx", index=False)

# print(f"Kết quả đã được lưu vào {excel_filename}")
print(f"Kết quả đã được lưu vào throughput_mobilenetv2.xlsx")


Trial 1: Throughput = 78.32248224673845 samples/second
Trial 2: Throughput = 201.10840823098266 samples/second
Trial 3: Throughput = 201.20463683949566 samples/second
Trial 4: Throughput = 206.32654838084548 samples/second
Trial 5: Throughput = 100.2605462525614 samples/second
Trial 6: Throughput = 203.46312007456802 samples/second
Trial 7: Throughput = 214.55708715679972 samples/second
Trial 8: Throughput = 201.3336072178765 samples/second
Trial 9: Throughput = 178.23313619505583 samples/second
Trial 10: Throughput = 150.45597074791374 samples/second
Trial 11: Throughput = 100.31785928887153 samples/second
Trial 12: Throughput = 100.32512352230712 samples/second
Trial 13: Throughput = 163.3588546308119 samples/second
Trial 14: Throughput = 209.09388080029382 samples/second
Trial 15: Throughput = 209.5053468415475 samples/second
Average Throughput: 167.8577738951113 samples/second
Kết quả đã được lưu vào throughput_mobilenetv2.xlsx


## Outlier

In [None]:
import pandas as pd

# Danh sách các file Excel
files = [
    "throughput_efficientnetb4.xlsx",
    "throughput_resnet50.xlsx",
    "throughput_densenet169.xlsx",
    "throughput_vgg16.xlsx",
    "throughput_mobilenetv2.xlsx"
]

# Hàm loại bỏ outlier và tính giá trị trung bình mới
def remove_outliers_and_calculate_average(filename):
    # Load dữ liệu
    df = pd.read_excel(filename)

    # Tính IQR
    Q1 = df['Throughput'].quantile(0.25)
    Q3 = df['Throughput'].quantile(0.75)
    IQR = Q3 - Q1

    # Định nghĩa khoảng không phải outlier
    lower_bound = Q1 - 0 * IQR # Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR # Q3 + 1.5 * IQR

    # Lọc dữ liệu không phải outlier
    filtered_df = df[(df['Throughput'] >= lower_bound) & (df['Throughput'] <= upper_bound)]

    # Tính giá trị trung bình sau khi loại bỏ outlier
    average_throughput_filtered = filtered_df['Throughput'].mean()
    median_throughput_filtered = filtered_df['Throughput'].median()
    print(f"\n{filename}")
    print(f"Average Throughput after removing outliers: {average_throughput_filtered}")
    print(f"Median Throughput after removing outliers: {median_throughput_filtered}")

    return average_throughput_filtered

# Lặp qua mỗi file và tính giá trị trung bình mới
averages = {}
for file in files:
    model_name = file.replace("throughput_", "").replace(".xlsx", "")
    averages[model_name] = remove_outliers_and_calculate_average(file)

# Chuyển đổi kết quả thành DataFrame
df_averages = pd.DataFrame(list(averages.items()), columns=['Model', 'Average Throughput'])

# Xuất kết quả ra file Excel mới
df_averages.to_excel("updated_throughput_averages.xlsx", index=False)
print("Kết quả đã được cập nhật và lưu vào updated_throughput_averages.xlsx")


throughput_efficientnetb4.xlsx
Average Throughput after removing outliers: 48.23491992518069
Median Throughput after removing outliers: 48.03186574645008

throughput_resnet50.xlsx
Average Throughput after removing outliers: 142.79835684466732
Median Throughput after removing outliers: 152.0303391954315

throughput_densenet169.xlsx
Average Throughput after removing outliers: 134.20349339981942
Median Throughput after removing outliers: 141.33333280054552

throughput_vgg16.xlsx
Average Throughput after removing outliers: 134.63452514765711
Median Throughput after removing outliers: 137.7479228911069

throughput_mobilenetv2.xlsx
Average Throughput after removing outliers: 192.20819758427515
Median Throughput after removing outliers: 201.2691220286861
Kết quả đã được cập nhật và lưu vào updated_throughput_averages.xlsx


## Other

In [None]:
# Disconnect and delete Colab runtime
from google.colab import runtime
runtime.unassign()