In [1]:
from utils.dataset_utils import OriginalDataset, plot_image
from utils.utils import get_storage
import numpy as np

original_dataset_path = "datasets/droid_100_sample_pictures"
ds = OriginalDataset(data_path=original_dataset_path, color=True)

height = len(ds[0])
width = len(ds[0][0])
channels = 3

print(f"Height: {height}, Width: {width}")


Height: 180, Width: 320


In [2]:
from collections.abc import Iterable

# Accepts any kind of nested array and returns the minimum and maximum values.
def get_array_min_max(img):
   min_value = 999999
   max_value = -99999
   if isinstance(img, Iterable):
      for iter in img:
         res = get_array_min_max(iter)
         min_value = min(min_value, res[0])
         max_value = max(max_value, res[1])
   else:
      min_value = min(min_value, int(img))
      max_value = max(max_value, int(img))
   return min_value, max_value

# Counts the number of entries in a nested array.
def get_array_length(img):
   count = 0
   if isinstance(img, Iterable):
      for iter in img:
         count += get_array_length(iter)
   else:
      count += 1
   return count

# Returns the bit width to represent each entry in a nested array.
def get_array_bit_width(img):
   min_value, max_value = get_array_min_max(img)
   return (int(max_value) - int(min_value)).bit_length()

# Returns the total number of bits required to store the nested array.
def get_array_size(img):
   return get_array_length(img) * get_array_bit_width(img)


In [5]:
from tqdm import tqdm

# Create delta. delta := img - base
def create_delta(base, img):
   delta = np.zeros((height, width, channels), dtype=int)
   for y in range(height):
      for x in range(width):
         delta[y][x][0] = int(img[y][x][0]) - int(base[y][x][0])
         delta[y][x][1] = int(img[y][x][1]) - int(base[y][x][1])
         delta[y][x][2] = int(img[y][x][2]) - int(base[y][x][2])
   return delta

def create_outliers(img, cond):
   positions = []
   outliers = []
   for x in range(height):
      for y in range(width):
         if cond(img[x][y]):
            outliers.append(np.copy(img[x][y]))
            positions.append([x, y])
            img[x][y] = [0, 0, 0]
   return positions, outliers

def get_position_size(positions): # TODO: try to encode the positions with Andrea's bitmap method -> roaring bitmap
   return get_array_size(positions)

def delta_with_outliers(base, img):
   threshold = 4 # TODO: opt 

   delta = create_delta(base, img)
   positions, outliers = create_outliers(delta, lambda x: int(x[0]).bit_length() >= threshold or int(x[1]).bit_length() >= threshold or int(x[2]).bit_length() >= threshold)
   
   size = get_array_size(delta) + get_position_size(positions) + get_array_size(outliers)
   return size

# Measurement code
total = 0
for img in tqdm(ds):
   total += get_array_size(img)

base = ds[0]
total_base = 0
for img in tqdm(ds):
   total_base += delta_with_outliers(base, img)

prev = ds[0]
total_delta = 0
for img in tqdm(ds):
   total_delta += delta_with_outliers(prev, img)
   prev = img

print(f"Total: {total}")
print(f"Total Base: {total_base}")
print(f"Total Delta: {total_delta}")


100%|██████████| 166/166 [00:30<00:00,  5.47it/s]
100%|██████████| 166/166 [01:01<00:00,  2.68it/s]
100%|██████████| 166/166 [00:53<00:00,  3.12it/s]

Total: 229478400
Total Base: 210562359
Total Delta: 132361710



