In [2]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import re
import shutil

### masks

In [5]:
base_dir = Path("../data-preprocessing/")
assert base_dir.exists()
dir_labelled_raw = base_dir / "masks-raw"
assert dir_labelled_raw.exists()
dir_labelled_renamed = base_dir / "masks-renamed"
dir_labelled_renamed.mkdir(exist_ok=True)

file_ext = ".npy"
files = list(dir_labelled_raw.glob(f"*{file_ext}"))
print(f"Found {len(files)} files")

# sort files by the first number in the filename
files.sort(key=lambda x: int(re.search(r"task-(\d+)", x.name).group(1)))
# print sorted filenames
print("Sorted files:")
for file in files:
    print(file.name)

index = 0
for file in files:
    # rename the file to task-<index>.npy
    new_name = f"mask_{index}"
    new_name_with_ext = f"{new_name}{file_ext}"
    new_file = dir_labelled_renamed / new_name_with_ext
    # load the numpy array
    arr = np.load(file)
    # convert to bool for binary mask
    arr = arr.astype(bool)
    print(f"Renaming {file.name} to {new_name}")
    # save the numpy array
    np.save(new_file, arr)

    # save as png too
    # save as png
    png_file = dir_labelled_renamed / f"{new_name}.png"
    plt.imsave(png_file, arr, cmap="viridis")
    index += 1

Found 55 files
Sorted files:
task-107-annotation-107-by-2-tag-dna-0.npy
task-108-annotation-108-by-2-tag-dna-0.npy
task-109-annotation-109-by-2-tag-dna-0.npy
task-110-annotation-110-by-2-tag-dna-0.npy
task-111-annotation-111-by-2-tag-dna-0.npy
task-112-annotation-112-by-2-tag-dna-0.npy
task-113-annotation-113-by-2-tag-dna-0.npy
task-114-annotation-114-by-2-tag-dna-0.npy
task-115-annotation-115-by-2-tag-dna-0.npy
task-116-annotation-116-by-2-tag-dna-0.npy
task-117-annotation-117-by-2-tag-dna-0.npy
task-118-annotation-118-by-2-tag-dna-0.npy
task-119-annotation-119-by-2-tag-dna-0.npy
task-120-annotation-120-by-2-tag-dna-0.npy
task-121-annotation-121-by-2-tag-dna-0.npy
task-122-annotation-122-by-2-tag-dna-0.npy
task-123-annotation-123-by-2-tag-dna-0.npy
task-124-annotation-124-by-2-tag-dna-0.npy
task-125-annotation-125-by-2-tag-dna-0.npy
task-126-annotation-126-by-2-tag-dna-0.npy
task-127-annotation-127-by-2-tag-dna-0.npy
task-128-annotation-128-by-2-tag-dna-0.npy
task-129-annotation-129-b

### images

In [None]:
# do the same but for images
base_dir = Path("../data-preprocessing/")
dir_images = base_dir / "images"
assert dir_images.exists()
dir_images_renamed = base_dir / "images-renamed"
dir_images_renamed.mkdir(exist_ok=True)
file_ext = ".npy"
files = list(dir_images.glob(f"*{file_ext}"))
print(f"Found {len(files)} files")
# sort files by the first number in the filename
files.sort(key=lambda x: int(re.search(r"image_(\d+)", x.name).group(1)))
# print sorted filenames
print("Sorted files:")
for file in files:
    print(file.name)
index = 0
for file in files:
    # rename the file to task-<index>.npy
    new_name = f"image_{index}"
    new_name_with_ext = f"{new_name}{file_ext}"
    new_file = dir_images_renamed / new_name_with_ext
    print(f"Renaming {file.name} to {new_name}")
    shutil.copy(file, new_file)

    # save as png too
    # load the numpy array
    arr = np.load(file)
    # save as png
    png_file = dir_images_renamed / f"{new_name}.png"
    plt.imsave(png_file, arr, cmap="viridis")
    index += 1
