In [None]:
import cv2 
import torch 
import pandas as pd
import random
import warnings 
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from models.space_net import SPACE
from models.race import RACE, ACE
from models.hdrnet import PICE_B
from utils import decompose_imgs, compose_imgs
from ssim_map import pos_similarity_ratio
from saliency_losses import nss, corr_coeff, kld_loss, log_softmax, softmax
from utils import contrast_loss_G, contrast_loss_L, R_imgs, R_sclr, EME
from ISFDataset import ISFDataset
from torch.utils.data import DataLoader
from kornia.losses.ssim import ssim_loss
import numpy as np

In [None]:
img = cv2.imread("./test_images/OutdoorManMade_004.jpg") 
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
orig_img = img.copy()
img = torch.from_numpy(img).permute(2,0,1).unsqueeze(0) / 255. 
y, cbcr = decompose_imgs(img)

gamma = 2.2
k = 0.6
R = 1 - k ** gamma
y_R = y * k
print(y.shape)

In [3]:
model_space = SPACE(apply_center_bias=True, apply_gfcorrection=True, apply_len=True)
model_race = RACE()
model_ace = ACE()

In [4]:
### MEASURE LATENCY CPU
import time
import psutil

def measure_latency_cpu_usage(model, test_inputs, R=0):
    # move model and input to cpu
    model = model.to("cpu")
    test_inputs = test_inputs.to("cpu")
    process = psutil.Process()
    cpu_start = process.cpu_percent()
    start = time.time()
    with torch.no_grad():
        _ = model(test_inputs, R=R)
    end = time.time()
    cpu_end = process.cpu_percent()
    latency = end - start
    cpu_usage = cpu_end - cpu_start
    return latency, cpu_usage

history_ltc = []
for i in range(50):
    ltc, _ = measure_latency_cpu_usage(model_ace, y, R)
    history_ltc.append(ltc)

In [None]:
print(np.mean(history_ltc), np.std(history_ltc)) # in second unit

In [None]:
### MEASURE LATENCY GPU
# move the model to GPU
def measure_latency_gpu(model, test_inputs, R=0):
   model = model.to("cuda")
   test_inputs = test_inputs.to("cuda")
   starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
   repetitions = 50
   timings=np.zeros((repetitions,1))
   #GPU-WARM-UP
   for _ in range(10):
      _ = model(test_inputs, R=R)
   # MEASURE PERFORMANCE
   with torch.no_grad():
      for rep in range(repetitions):
         starter.record()
         _ = model(test_inputs, R=R)
         ender.record()
         # WAIT FOR GPU SYNC
         torch.cuda.synchronize()
         curr_time = starter.elapsed_time(ender)
         timings[rep] = curr_time
   mean_syn = np.sum(timings) / repetitions
   std_syn = np.std(timings)
   print(mean_syn, std_syn)
   
measure_latency_gpu(model_ace, y, R)

### PERFORM THE TWO TASKS FOR DIFFERENT INPUT RESOLUTION


In [None]:
img_480 = cv2.resize(orig_img, (848, 480))
img_720 = cv2.resize(orig_img, (1280, 720))
img_1080 = orig_img
img_2k = cv2.resize(orig_img, (2048, 1556))
img_480 = torch.from_numpy(img_480).permute(2,0,1).unsqueeze(0) / 255. 
img_720 = torch.from_numpy(img_720).permute(2,0,1).unsqueeze(0) / 255. 
img_1080 = torch.from_numpy(img_1080).permute(2,0,1).unsqueeze(0) / 255. 
img_2k = torch.from_numpy(img_2k).permute(2,0,1).unsqueeze(0) / 255. 
y_480, _ = decompose_imgs(img_480)
y_1080, _ = decompose_imgs(img_1080)
y_720, _ = decompose_imgs(img_720)
y_2k, _ = decompose_imgs(img_2k)
print(y_480.shape)
print(y_720.shape)
print(y_1080.shape)
print(y_2k.shape)

In [None]:
img_list = [y_480, y_720, y_1080, y_2k]
model_list = [model_ace, model_race, model_space]

for i, model in enumerate(model_list):
    print(f"MODEL {i}")
    for img in img_list:
        print(f"==== CPU TIME FOR INPUT {img.shape}")
        history_ltc = []
        for i in range(20):
            ltc, _ = measure_latency_cpu_usage(model, img, R)
            history_ltc.append(ltc)
        print(np.mean(history_ltc), np.std(history_ltc)) # in second unit
    