In [1]:
import glob
import json
import multiprocessing
import os

import cv2
import numpy as np
from skimage.metrics import peak_signal_noise_ratio, mean_squared_error, structural_similarity
import tqdm

In [2]:
def set_up(directory='loss', always=True):
    if not always and os.path.exists(directory):
        return
    !rm -rf {directory}
    !unzip -q -d {directory} ~/val2017.zip
    
def jpgs(directory='orig'):
    return sorted(glob.glob(f'{directory}/**/*.jpg'))

%time set_up('orig', False)

CPU times: user 8 µs, sys: 5 µs, total: 13 µs
Wall time: 14.8 µs


In [3]:
def degrade(q):
    set_up()
    !mogrify -quality {q} loss/val2017/*.jpg

def pdegrade(q):
    set_up()
    # !ls loss/val2017/* | xargs -n 625 -P 8 mogrify -quality {q}
    !parallel mogrify -quality {q} ::: loss/val2017/*
    
def evaluate(orig_path, loss_path):
    import skimage
    from skimage.metrics import peak_signal_noise_ratio, mean_squared_error, structural_similarity
    orig = cv2.imread(orig_path, cv2.IMREAD_ANYCOLOR)
    loss = cv2.imread(loss_path, cv2.IMREAD_ANYCOLOR)
    h, w = orig.shape[:2]
    channels = 1 if len(loss.shape) < 3 else loss.shape[2]
    orig_channels = 1 if len(orig.shape) < 3 else orig.shape[2]
    
    if orig_channels == 3 and channels == 1:
        loss = np.stack([loss, loss, loss], axis=-1)
        
    ssim_args = {}
    if orig_channels == 3:
        if skimage.__version__ >= '0.19':
            ssim_args['channel_axis'] = -1
        else:
            ssim_args['multichannel'] = True
    
    return {
        'image': os.path.basename(orig_path),
        'width': w,
        'height': h,
        'channels': channels,
        'origchan': orig_channels,
        'filesize': os.stat(loss_path).st_size,
        'origsize': os.stat(orig_path).st_size,
        'rawsize': w * h * channels,
        'mean_squared_error': mean_squared_error(orig, loss),
        'peak_signal_noise_ratio': peak_signal_noise_ratio(orig, loss),
        'structural_similarity': structural_similarity(orig, loss, **ssim_args),
    }
    

In [None]:
%%time

p = multiprocessing.Pool()

for q in tqdm.trange(1, 101):
    pdegrade(q)
    results = {
        'quality': q,
        'files': p.starmap(evaluate, zip(jpgs(), jpgs('loss'))),
    }
    with open(f'properties_{q:03d}.json', 'w') as jout:
        json.dump(results, jout, indent=2)

  7%|▋         | 7/100 [28:35<6:19:41, 244.97s/it]

In [None]:
print(open('properties_100.json').read(1000))