In [1]:
import glob
import json
import multiprocessing
import os

import cv2
import numpy as np
from skimage.metrics import peak_signal_noise_ratio, mean_squared_error, structural_similarity
import tqdm

In [2]:
def set_up(directory='loss', always=True):
    if not always and os.path.exists(directory):
        return
    !rm -rf {directory}
    !unzip -q -d {directory} ~/val2017.zip
    
def jpgs(directory='orig'):
    return sorted(glob.glob(f'{directory}/**/*.jpg'))

%time set_up('orig', False)

CPU times: user 30 µs, sys: 6 µs, total: 36 µs
Wall time: 38.9 µs


In [11]:
def degrade(q):
    set_up()
    !mogrify -quality {q} loss/val2017/*.jpg

def pdegrade(q):
    set_up()
    # !ls loss/val2017/* | xargs -n 625 -P 8 mogrify -quality {q}
    !parallel mogrify -quality {q} ::: loss/val2017/*
    
def evaluate(orig_path, loss_path):
    from skimage.metrics import peak_signal_noise_ratio, mean_squared_error, structural_similarity
    orig = cv2.imread(orig_path, cv2.IMREAD_ANYCOLOR)
    loss = cv2.imread(loss_path, cv2.IMREAD_ANYCOLOR)
    
    if len(orig.shape) == 3 and len(loss.shape) == 2:
        loss = np.stack([loss, loss, loss], axis=-1)
    h, w = orig.shape[:2]
    channels = 1 if len(orig.shape) < 3 else orig.shape[2]
    
    return {
        'image': os.path.basename(orig_path),
        'width': w,
        'height': h,
        'channels': channels,
        'filesize': os.stat(loss_path).st_size,
        'origsize': os.stat(orig_path).st_size,
        'rawsize': w * h * channels,
        'mean_squared_error': mean_squared_error(orig, loss),
        'peak_signal_noise_ratio': peak_signal_noise_ratio(orig, loss),
        'structural_similarity': 
            structural_similarity(orig, loss, channel_axis=-1) if len(orig.shape) > 2 
            else structural_similarity(orig, loss),
    }
    

In [12]:
%%time

p = multiprocessing.Pool()

for q in tqdm.trange(100, 101):
    pdegrade(q)
    results = {
        'quality': q,
        'files': p.starmap(evaluate, zip(jpgs(), jpgs('loss'))),
    }
    with open(f'properties_{q:03d}.json', 'w') as jout:
        json.dump(results, jout, indent=2)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [04:38<00:00, 278.79s/it]

CPU times: user 584 ms, sys: 151 ms, total: 735 ms
Wall time: 4min 38s





In [13]:
print(open('properties_100.json').read(1000))

{
  "quality": 100,
  "files": [
    {
      "image": "000000000139.jpg",
      "width": 640,
      "height": 426,
      "channels": 3,
      "filesize": 261713,
      "origsize": 161811,
      "rawsize": 817920,
      "mean_squared_error": 0.14997187989045382,
      "peak_signal_noise_ratio": 56.37070525500654,
      "structural_similarity": 0.9987529653256231
    },
    {
      "image": "000000000285.jpg",
      "width": 586,
      "height": 640,
      "channels": 3,
      "filesize": 511085,
      "origsize": 335861,
      "rawsize": 1125120,
      "mean_squared_error": 0.21307860494880546,
      "peak_signal_noise_ratio": 54.84540516123896,
      "structural_similarity": 0.9994021450201455
    },
    {
      "image": "000000000632.jpg",
      "width": 640,
      "height": 483,
      "channels": 3,
      "filesize": 321223,
      "origsize": 155667,
      "rawsize": 927360,
      "mean_squared_error": 0.17581629572118704,
      "peak_signal_noise_ratio": 55.680212352165974,
      "s