In [None]:
# run and install the below dependencies
!pip install facexlib basicsr
!pip install gfpgan
!pip install psutil
!pip install onnx




In [None]:
# dependencies and set the directories for dataset
import sys
sys.path.append('/kaggle/working/ESRGAN')
import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from RRDBNet_arch import RRDBNet
import os
from pathlib import Path

cwd = os.getcwd()

input_root = Path(cwd+'/data/input/animals')
output_root = Path(cwd+'/data/output')
output_root.mkdir(parents=True, exist_ok=True)



In [None]:
# set the model parameters and details
model = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32)
model_path = cwd+'/RRDB_ESRGAN_x4 (1).pth'
state_dict = torch.load(model_path, map_location='cpu')
model.load_state_dict(state_dict, strict=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()


RRDBNet(
  (conv_first): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (RRDB_trunk): Sequential(
    (0): RRDB(
      (RDB1): ResidualDenseBlock_5C(
        (conv1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv3): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv4): Conv2d(160, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv5): Conv2d(192, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (lrelu): LeakyReLU(negative_slope=0.2, inplace=True)
      )
      (RDB2): ResidualDenseBlock_5C(
        (conv1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv3): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv4): Conv2d(160, 32, kernel_size=(3, 3), str

In [None]:
# helper function to resize images using bicubic and convert to tensors
def preprocess(image_path, device):
    image = Image.open(image_path).convert('RGB')
    image = image.resize((128,128), Image.BICUBIC)
    transform = transforms.ToTensor()
    image_tensor = transform(image).unsqueeze(0).to(device) 

    return image_tensor


1


In [None]:
model = model.cpu()
dummy_input = torch.randn(1, 3, 64, 64)

# export model into Onnx format for OpenVINO
torch.onnx.export(
    model,
    dummy_input,
    "esrgan_model.onnx",
    opset_version=11,
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={"input": {2: "height", 3: "width"}, "output": {2: "height_out", 3: "width_out"}
    }
)
print("Exported to esrgan_model.onnx")

Exported to esrgan_model.onnx


In [None]:
import onnx

# validate the exported Onxx model format
onnx_model = onnx.load("esrgan_model.onnx")

onnx.checker.check_model(onnx_model)
print("Onnx model is valid!")

Onnx model is valid!


In [None]:
# convert Onnx model into IR format (intermediate representation) - generates the bin and xml files that are used by OpenVINO
!ovc esrgan_model.onnx

[ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression by removing argument "compress_to_fp16" or set it to false "compress_to_fp16=False".
Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html
[ SUCCESS ] XML file: c:\Users\Y\Documents\ml\course_work\ESRGAN\esrgan_model.xml
[ SUCCESS ] BIN file: c:\Users\Y\Documents\ml\course_work\ESRGAN\esrgan_model.bin


In [None]:
# PART A - RUNNING UNOPTIMIZED DEFAUT CPU VERSION WITH DEFAULT THREAD COUNT

from torchvision.utils import save_image
from tqdm import tqdm
import torch
import time
import psutil

cpu_percent_before = psutil.cpu_percent(interval=None)
memory_before = psutil.virtual_memory().used
time_start = time.time()

# model to cpu
device = torch.device('cpu')
model = model.to(device)
model.eval()

total_time = 0.0
image_count = 0

animal_dirs = sorted([d for d in input_root.iterdir() if d.is_dir()])[:20]

for animal_dir in tqdm(animal_dirs, desc="CPU Upscaling Images"):
    output_class_dir = output_root / f"{animal_dir.name}_cpu"
    output_class_dir.mkdir(parents=True, exist_ok=True)

    for image_path in animal_dir.glob('*.jpg'):
        try:
            processed = preprocess(image_path, device)
            if isinstance(processed, tuple):
                low_res_image = processed[0]
            else:
                low_res_image = processed

            start = time.time()

            with torch.no_grad():
                sr_image = model(low_res_image).clamp(0, 1)

            end = time.time()

            total_time += (end - start)
            image_count += 1

            if sr_image.dim() == 4 and sr_image.size(0) == 1:
                sr_image = sr_image.squeeze(0)

            # save output image
            save_path = output_class_dir / image_path.with_suffix('.png').name
            save_image(sr_image, save_path)

        except Exception as e:
            print(f"Error with {image_path}: {e}")

    print(f'Completed inference for {animal_dir}')

time_end = time.time()
cpu_percent_after = psutil.cpu_percent(interval=None)
memory_after = psutil.virtual_memory().used

duration = time_end - time_start
cpu_usage = cpu_percent_after - cpu_percent_before
memory_used_mb = (memory_after - memory_before) / (1024 ** 2)

print(f"\n=== Benchmark Results for default CPU Implementation ===")
print(f"CPU Usage Change: {cpu_usage:.2f}%")
print(f"Memory Used: {memory_used_mb:.2f} MB")

print(f"\n[CPU] Processed {image_count} images.")
print(f"[CPU] Total inference time: {total_time:.2f} seconds")
print(f"[CPU] Average time per image: {total_time / image_count:.4f} seconds")


CPU Upscaling Images:   0%|          | 0/20 [00:00<?, ?it/s]

CPU Upscaling Images:   5%|▌         | 1/20 [04:14<1:20:39, 254.70s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\antelope


CPU Upscaling Images:  10%|█         | 2/20 [07:49<1:09:22, 231.27s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\badger


CPU Upscaling Images:  15%|█▌        | 3/20 [11:26<1:03:38, 224.61s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\bat


CPU Upscaling Images:  20%|██        | 4/20 [14:59<58:40, 220.02s/it]  

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\bear


CPU Upscaling Images:  25%|██▌       | 5/20 [18:29<54:08, 216.58s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\bee


CPU Upscaling Images:  30%|███       | 6/20 [21:58<49:53, 213.85s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\beetle


CPU Upscaling Images:  35%|███▌      | 7/20 [25:29<46:07, 212.90s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\bison


CPU Upscaling Images:  40%|████      | 8/20 [28:54<42:05, 210.49s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\boar


CPU Upscaling Images:  45%|████▌     | 9/20 [32:22<38:26, 209.66s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\butterfly


CPU Upscaling Images:  50%|█████     | 10/20 [35:51<34:55, 209.59s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\cat


CPU Upscaling Images:  55%|█████▌    | 11/20 [39:24<31:34, 210.45s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\caterpillar


CPU Upscaling Images:  60%|██████    | 12/20 [42:57<28:11, 211.42s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\chimpanzee


CPU Upscaling Images:  65%|██████▌   | 13/20 [46:25<24:31, 210.28s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\cockroach


CPU Upscaling Images:  70%|███████   | 14/20 [49:57<21:04, 210.71s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\cow


CPU Upscaling Images:  75%|███████▌  | 15/20 [53:34<17:43, 212.80s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\coyote


CPU Upscaling Images:  80%|████████  | 16/20 [57:01<14:03, 210.86s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\crab


CPU Upscaling Images:  85%|████████▌ | 17/20 [1:00:37<10:37, 212.60s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\crow


CPU Upscaling Images:  90%|█████████ | 18/20 [1:04:10<07:05, 212.70s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\deer


CPU Upscaling Images:  95%|█████████▌| 19/20 [1:07:43<03:32, 212.70s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\dog


CPU Upscaling Images: 100%|██████████| 20/20 [1:11:20<00:00, 214.03s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\dolphin

=== Benchmark Results for default CPU Implementation ===
CPU Usage Change: 28.60%
Memory Used: -2057.01 MB

[CPU] Processed 1200 images.
[CPU] Total inference time: 4029.23 seconds
[CPU] Average time per image: 3.3577 seconds





In [None]:
# PART B - RUNNING UNOPTIMIZED CPU VERSION WITH VARIABLE THREAD COUNT - RUN ON SUBSET OF IMAGES. 
# THREAD COUNTS TESTED - 3, 6, 9, 12
from torchvision.utils import save_image
from tqdm import tqdm
import torch
import time
import psutil


def process_images(thread_cnt):

    torch.set_num_threads(thread_cnt)

    print(f"\nUpscaling using thread count = {torch.get_num_threads()}")
    
    cpu_percent_before = psutil.cpu_percent(interval=None)
    memory_before = psutil.virtual_memory().used
    time_start = time.time()

    model = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32)
    model_path = cwd+'/RRDB_ESRGAN_x4 (1).pth'
    state_dict = torch.load(model_path, map_location='cpu')
    model.load_state_dict(state_dict, strict=True)

    # model to cpu
    device = torch.device('cpu')
    model = model.to(device)
    model.eval()

    total_time = 0.0
    image_count = 0

    animal_dirs = sorted([d for d in input_root.iterdir() if d.is_dir()])[:1]


    for animal_dir in tqdm(animal_dirs, desc="CPU Upscaling Images"):
        output_class_dir = output_root / f"{animal_dir.name}_cpu"
        output_class_dir.mkdir(parents=True, exist_ok=True)

        for image_path in animal_dir.glob('*.jpg'):
            try:
                processed = preprocess(image_path, device)
                if isinstance(processed, tuple):
                    low_res_image = processed[0]
                else:
                    low_res_image = processed

                start = time.time()

                with torch.no_grad():
                    sr_image = model(low_res_image).clamp(0, 1)

                end = time.time()

                total_time += (end - start)
                image_count += 1

                if sr_image.dim() == 4 and sr_image.size(0) == 1:
                    sr_image = sr_image.squeeze(0)

                # save output image
                save_path = output_class_dir / image_path.with_suffix('.png').name
                save_image(sr_image, save_path)

            except Exception as e:
                print(f"Error with {image_path}: {e}")

        print(f'Completed inference for {animal_dir}')

    time_end = time.time()
    cpu_percent_after = psutil.cpu_percent(interval=None)
    memory_after = psutil.virtual_memory().used

    duration = time_end - time_start
    cpu_usage = cpu_percent_after - cpu_percent_before
    memory_used_mb = (memory_after - memory_before) / (1024 ** 2)

    print(f"\n=== Benchmark Results for default CPU Implementation ===")
    print(f"CPU Usage Change: {cpu_usage:.2f}%")
    print(f"Memory Used: {memory_used_mb:.2f} MB")

    print(f"\n[CPU] Processed {image_count} images.")
    print(f"[CPU] Total inference time: {total_time:.2f} seconds")
    print(f"[CPU] Average time per image: {total_time / image_count:.4f} seconds")

threads_list = [3, 6, 9, 12]

for thread_cnt in threads_list:
    process_images(thread_cnt)




Upscaling using thread count = 3


CPU Upscaling Images: 100%|██████████| 1/1 [02:19<00:00, 139.14s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\ant

=== Benchmark Results for default CPU Implementation ===
CPU Usage Change: 24.20%
Memory Used: -170.51 MB

[CPU] Processed 30 images.
[CPU] Total inference time: 132.54 seconds
[CPU] Average time per image: 4.4181 seconds

Upscaling using thread count = 6



CPU Upscaling Images: 100%|██████████| 1/1 [01:54<00:00, 114.25s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\ant

=== Benchmark Results for default CPU Implementation ===
CPU Usage Change: 56.50%
Memory Used: -0.28 MB

[CPU] Processed 30 images.
[CPU] Total inference time: 107.53 seconds
[CPU] Average time per image: 3.5845 seconds

Upscaling using thread count = 9



CPU Upscaling Images: 100%|██████████| 1/1 [01:41<00:00, 101.91s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\ant

=== Benchmark Results for default CPU Implementation ===
CPU Usage Change: 82.90%
Memory Used: 47.44 MB

[CPU] Processed 30 images.
[CPU] Total inference time: 94.89 seconds
[CPU] Average time per image: 3.1629 seconds

Upscaling using thread count = 12



CPU Upscaling Images: 100%|██████████| 1/1 [01:41<00:00, 101.24s/it]

Completed inference for c:\Users\Y\Documents\ml\course_work\ESRGAN\data\input\animals\ant

=== Benchmark Results for default CPU Implementation ===
CPU Usage Change: 98.60%
Memory Used: -8.58 MB

[CPU] Processed 30 images.
[CPU] Total inference time: 94.03 seconds
[CPU] Average time per image: 3.1343 seconds





In [None]:
# PART C - OPENVINO OPTIMIZED VERSION - USES THE IR FORMAT CONVERTED BIN FILE FOR MODEL

from openvino import Core
import numpy as np
import torch
from torchvision.utils import save_image
from tqdm import tqdm
import time
from pathlib import Path

cpu_percent_before = psutil.cpu_percent(interval=None)
memory_before = psutil.virtual_memory().used
time_start = time.time()

openvino_libs = os.path.join(sys.prefix, "Lib", "site-packages", "openvino", "libs")
os.add_dll_directory(openvino_libs)

# change the model path to where the model was exported
model_path = "c:/Users/Y/Documents/ml/course_work/ESRGAN/esrgan_model.xml"

# load openvino model
ie = Core()
model_ov = ie.read_model(model=model_path)
compiled_model = ie.compile_model(model=model_ov, device_name="CPU")
input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)

total_time = 0.0
image_count = 0

animal_dirs = sorted([d for d in input_root.iterdir() if d.is_dir()])[:20]

# get images from animal dataset
for animal_dir in tqdm(animal_dirs, desc="OpenVINO CPU Upscaling Images"):
    output_class_dir = output_root / f"{animal_dir.name}_cpu"
    output_class_dir.mkdir(parents=True, exist_ok=True)

    for image_path in animal_dir.glob('*.jpg'):
        try:
            processed = preprocess(image_path, device='cpu') 
            if isinstance(processed, torch.Tensor):
                processed = processed.numpy()

            processed = processed.astype(np.float32)

            
            start = time.time()
            result = compiled_model([processed])[output_layer]

            end = time.time()
            total_time += (end - start)
            image_count += 1


            sr_image = torch.tensor(result).squeeze(0).clamp(0, 1) 

            save_path = output_class_dir / image_path.with_suffix('.png').name
            save_image(sr_image, save_path)

        except Exception as e:
            print(f"Error with {image_path}: {e}")

time_end = time.time()
cpu_percent_after = psutil.cpu_percent(interval=None)
memory_after = psutil.virtual_memory().used

duration = time_end - time_start
cpu_usage = cpu_percent_after - cpu_percent_before
memory_used_mb = (memory_after - memory_before) / (1024 ** 2)

print(f"\n=== Benchmark Results for OpenVINO CPU Implementation ===")
print(f"CPU Usage: {cpu_usage:.2f}%")
print(f"Memory Used: {memory_used_mb:.2f} MB")

print(f"\n[OpenVINO CPU] Processed {image_count} images.")
print(f"[OpenVINO CPU] Total inference time: {total_time:.2f} seconds")
print(f"[OpenVINO CPU] Average time per image: {total_time / image_count:.4f} seconds")

OpenVINO CPU Upscaling Images: 100%|██████████| 20/20 [46:42<00:00, 140.11s/it]


=== Benchmark Results for OpenVINO CPU Implementation ===
CPU Usage: 53.60%
Memory Used: 123.68 MB

[OpenVINO CPU] Processed 1200 images.
[OpenVINO CPU] Total inference time: 2553.60 seconds
[OpenVINO CPU] Average time per image: 2.1280 seconds



