# Trainable Data Preparation

In [1]:
import numpy as np
from glob import glob
from tqdm import tqdm
from tifffile import TiffFile
import matplotlib.pyplot as plt

In [2]:
def prepare_npy_data_from_tif(mask_path):
    with TiffFile(mask_path) as mask_file:
        return mask_file.asarray()

def prepare_data(tif_directory, destination_directory):
    files = glob(f"{tif_directory}/*tif")
    for path in tqdm(files, desc=f"Writing files from {tif_directory} to {destination_directory}"):
        np_data = prepare_npy_data_from_tif(path)
        op_file_name = path.split("/")[-1].split("\\")[-1].split(".")[0].split("_")[-1]
        with open(f"{destination_directory}/{op_file_name}.npy", "wb") as f:
            np.save(f, np_data)

In [3]:
prepare_data("E:/solafune/train/s2_image", "E:/solafune/data/train/images")
prepare_data("E:/solafune/evaluation", "E:/solafune/data/eval/images")
prepare_data("E:/solafune/train/mask", "E:/solafune/data/train/masks")
prepare_data("E:/solafune/sample", "E:/solafune/data/eval/masks")

Writing files from E:/solafune/train/s2_image to E:/solafune/data/train/images:   0%|          | 0/2066 [00:00<?, ?it/s]

Writing files from E:/solafune/train/s2_image to E:/solafune/data/train/images: 100%|██████████| 2066/2066 [00:20<00:00, 103.10it/s]
Writing files from E:/solafune/evaluation to E:/solafune/data/eval/images: 100%|██████████| 2066/2066 [00:20<00:00, 101.40it/s]
Writing files from E:/solafune/train/mask to E:/solafune/data/train/masks: 100%|██████████| 2066/2066 [00:18<00:00, 113.14it/s]
Writing files from E:/solafune/sample to E:/solafune/data/eval/masks: 100%|██████████| 2066/2066 [00:04<00:00, 422.17it/s]


In [5]:
sample_image_np_data_path = "E:/solafune/data/train/images/3.npy"

data = np.load(sample_image_np_data_path)
data.shape

(24, 24, 12)

In [6]:
for folder in ["train", "eval"]:
    for folder_type in ["images", "masks"]:
        if folder == "eval" and folder_type == "masks":
            continue
        else:
            file_paths = glob(f"data/{folder}/{folder_type}/*.npy")
            shape_set = []
            for fp in tqdm(file_paths):
                buffer = np.load(fp)
                shape_set.append(buffer.shape)
            shape_set = set(shape_set)
            print(f"Different shapes at data/{folder}/{folder_type} : {shape_set}")

100%|██████████| 2066/2066 [00:13<00:00, 154.80it/s]


Different shapes at data/train/images : {(24, 23, 12), (24, 24, 12), (23, 22, 12), (22, 23, 12), (25, 24, 12), (26, 25, 12), (25, 26, 12), (22, 22, 12), (23, 23, 12), (24, 25, 12), (23, 24, 12), (25, 25, 12), (26, 26, 12)}


100%|██████████| 2066/2066 [00:08<00:00, 240.36it/s]


Different shapes at data/train/masks : {(23, 23), (24, 23), (25, 25), (22, 23), (26, 26), (23, 22), (24, 25), (25, 24), (26, 25), (23, 24), (22, 22), (25, 26), (24, 24)}


100%|██████████| 2066/2066 [00:12<00:00, 167.64it/s]

Different shapes at data/eval/images : {(24, 23, 12), (24, 24, 12), (23, 22, 12), (22, 23, 12), (25, 24, 12), (26, 25, 12), (25, 26, 12), (22, 22, 12), (23, 23, 12), (24, 25, 12), (23, 24, 12), (25, 25, 12), (26, 26, 12)}



