In [1]:
from pathlib import Path
from numpy.random import default_rng
from PIL import Image
import dill as pickle
import torchvision.transforms as transforms
import torch
import numpy as np
from src.dataset import get_borders

In [2]:
root_dir = "data/data_train"
testset_dir = "data/data_test"

In [3]:
root_dir = Path(root_dir)
file_paths = sorted([str(p) for p in root_dir.rglob("*.jpg")])

In [4]:
rng = default_rng(seed=0)
numbers = rng.choice(len(file_paths), size=208, replace=False)

In [5]:
testset_paths = [file_paths[idx] for idx in numbers]

In [6]:
for p in testset_paths:
    fname = "_".join(p.split("/")[-3:])
    Path(p).rename(f"{testset_dir}/{fname}.jpg")

In [7]:
testset_paths = sorted([str(p) for p in Path(testset_dir).rglob("*.jpg")])

In [8]:
borders_x = get_borders(len(testset_paths), seed=0)
borders_y = get_borders(len(testset_paths), seed=1)

In [9]:
rz = transforms.Resize((90,90))

In [15]:
known_arrays = []
input_arrays = []
target_arrays = []
sample_ids = []
for i, img_path in enumerate(testset_paths):
    img = Image.open(img_path)
    ar = np.array(rz(img))
    
    known_array = np.zeros_like(ar, dtype="uint8")
    border_x, border_y = borders_x[:,i], borders_y[:,i]
    
    known_array[border_x[0]:-border_x[1],border_y[0]:-border_y[1]] = 1
    target_array = ar[known_array==0]
    ar[:border_x[0]] = 0
    ar[-border_x[1]:] = 0
    ar[:,:border_y[0]] = 0
    ar[:,-border_y[1]:] = 0
    
    known_arrays.append(known_array)
    input_arrays.append(ar)
    target_arrays.append(target_array)
    sample_ids.append(i)

In [16]:
testset = {
    "input_arrays": tuple(input_arrays),
    "known_arrays": tuple(known_arrays),
    "target_arrays": tuple(target_arrays),
    "borders_x": borders_x,
    "borders_y": borders_y,
    "sample_ids": tuple(sample_ids)
}

In [17]:
with open("data/my_testset_dict.pkl", "wb") as f:
    pickle.dump(testset, f)
with open("data/my_testset.pkl", "wb") as f:
    pickle.dump(target_arrays, f)