In [None]:
# Look at the files, create a csv file with the size of the files

# Base path for where the files are stored
main_path = "/data/CID22_validation_set_without_distorted/"
main_path = "/data/CID22/"
# Path to redone pngs, using standar zlib and libpng at level6
# This eliminates the variations on the PNGs
orig_path = main_path + "redo/"

# QB3 in FTL mode
qb3_fast_path = main_path + "qb3/"
# QB3 in BASE mode
qb3_base_path = main_path + "qb3d/"
# QB3 in BEST mode
qb3_best_path = main_path + "qb3b/"
# QB3 in BEST mode with best band mix
qb3_best_band_path = main_path + "qb3bx/"

import csv

# First, get the list of redone files
import os
from os import path as path

orig_files = os.listdir(orig_path)
# Now, get the size of the files
orig_sizes = []
qb3fast_sizes = []
qb3base_sizes = []
qb3best_sizes = []
qb3best_band_sizes = []

names = []
for f in orig_files:
    if "png" in f:
        basename = path.splitext(f)[0]
        names.append(basename)
        orig_sizes.append(path.getsize(orig_path + f))
        qb3fast_sizes.append(path.getsize(qb3_fast_path + basename + ".qb3"))
        qb3base_sizes.append(path.getsize(qb3_base_path + basename + ".qb3"))
        qb3best_sizes.append(path.getsize(qb3_best_path + basename + ".qb3"))
        qb3best_band_sizes.append(path.getsize(qb3_best_band_path + basename + ".qb3"))


# Zip the lists
data = zip(names, orig_sizes, qb3fast_sizes, qb3base_sizes, qb3best_sizes, qb3best_band_sizes)
# Sort list by original size
data = sorted(data, key=lambda x: x[1])

# Write the data to a csv file, with LF line endings
with open("CID22_sizes.csv", "w") as f:
    writer = csv.writer(f, lineterminator='\n')
    writer.writerow(["Name", "PNG", "QB3 Fast", "QB3 Base", "QB3 Best", "QB3 Best Band"])
    for d in data:
        writer.writerow(d)

def read_csv(filename):
    with open(filename, "r") as f:
        reader = csv.reader(f)
        labels = next(reader)
        data = list(reader)

    # Convert columns 1 to 5 to integers
    for i in range(1, 6):
        for j in range(len(data)):
            data[j][i] = int(data[j][i])

    return labels, data

In [None]:
# plot the data as bar chart, sorted by PNG size
import matplotlib.pyplot as plt
import numpy as np

# Create a figure
fig, ax = plt.subplots()
# Set the figure size
fig.set_size_inches(20, 10)

# Create a bar plot for 4 bars, one space between each
bar_width = 0.2

# Read the CSV file to plot
labels, data = read_csv("CID22_sizes.csv")

index = np.arange(len(data))

# Unpack by columns
names = [x[0] for x in data]
# Normalize to percentage of raw size
RAW_SIZE = 512*512*3 / 100
orig = [int(x[1])/RAW_SIZE for x in data]
qb3fast = [int(x[2])/RAW_SIZE for x in data]
qb3base = [int(x[3])/RAW_SIZE for x in data]
qb3best = [int(x[4])/RAW_SIZE for x in data]
qb3bestmix = [int(x[5])/RAW_SIZE for x in data]

# bar_orig = plt.bar(index, orig, bar_width, label="Original")
# Make the original a line, with no marker
plt.plot(index, orig, label="PNG", linestyle='--', color='black')
bar_qb3fast = plt.bar(index + bar_width, qb3fast, bar_width*2, label="QB3 Fast")
# bar_qb3base = plt.bar(index + 2*bar_width, qb3base, bar_width, label="QB3 Base")
# bar_qb3best = plt.bar(index + 3*bar_width, qb3best, bar_width, label="QB3 Best")
bar_qb3bestmix = plt.bar(index + 2*bar_width, qb3bestmix, bar_width*4, label="QB3 Best")

plt.xlabel("File")
# Place x = 0 on the extreme left
plt.xlim(0, len(data))

plt.ylabel("Size as % of raw")
plt.yticks(np.arange(0, 100, 5))
plt.title("QB3 vs PNG sizes, CID22 complete set")
# plt.xticks(index + bar_width, names, rotation=70)
# Legend, upper left
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()
# Save the figure
fig.savefig("CID22_QB3vsPNG.svg")

In [None]:
# Another way to compare the size, based on the difference between the original and the best_band sizes, as a running sum
import matplotlib.pyplot as plt

# First, sort the data by the difference between the original and the best_band sizes
labels, data = read_csv("CID22_sizes.csv")
data = sorted(data, key=lambda x: x[1] - x[2])

# Prepend a line of zeros, to force the lines to start from 0-0
data = [["", 0, 0, 0, 0, 0]] + data


data = [[x[0], x[1], x[1] - x[2], x[1] - x[3], x[1] - x[4], x[1] - x[5]] for x in data]
# For columns 2 to 5, replace the values with the running sums
for line in range(1, len(data)):
    for col in range(2, 6):
        data[line][col] = data[line-1][col] + data[line][col]

totalsize = sum(x[1] for x in data)

# Create a figure
fig, ax = plt.subplots()
# Set the figure size
fig.set_size_inches(20, 10)

# Create a line chart
index = range(len(data))
# Unpack by columns
names, orig, qb3fast, qb3base, qb3best, qb3bestmix = zip(*data)

# line_orig = plt.plot(index, orig, label="Original")
line_qb3fast = plt.plot(index, qb3fast, label="QB3 Fast")
line_qb3base = plt.plot(index, qb3base, label="QB3 Base")
line_qb3best = plt.plot(index, qb3best, label="QB3 Best")
line_qb3bestmix = plt.plot(index, qb3bestmix, label="QB3 Best band mix")

plt.xlabel("File number")
plt.ylabel("Total savings (bytes)")
plt.title("QB3 size savings vs PNG")
# Draw a line at 0, thin
plt.axhline(0, color='black', linewidth=0.5)
# Place x = 0 on the extreme left
plt.xlim(0, len(data))
# Vertical axis ticks every 1e6
plt.yticks(np.arange(-2e6, 8e6, 1e6))
# Legend
plt.legend()
plt.show()
# Save it as SVG
fig.savefig("CID22_savings.svg")

# print the last value vs sum of the first column, in percentage
print(f"Total size of input PNGs: {totalsize}, {totalsize/250/RAW_SIZE:2.6}% of raw")
print(f"QB3 fast savings: {data[-1][2]} {100*data[-1][2]/totalsize:1.3}%")
print(f"QB3 base savings: {data[-1][3]} {100*data[-1][3]/totalsize:1.3}%")
print(f"QB3 best: {data[-1][4]} {100*data[-1][4]/totalsize:1.3}%")
print(f"QB3 best band mix savings: {data[-1][5]} {100*data[-1][5]/totalsize:1.3}%")
print(f"Total Qb3 output: {100*(totalsize - data[-1][5])/totalsize:2.4}% of the PNGs, {(totalsize - data[-1][5])/250/RAW_SIZE:2.6}% of raw")
