In [3]:
import os
from tifffile import tifffile
import numpy as np
import albumentations as A
import cv2
from tqdm import tqdm

def normalize_clip(img, vmin, vmax):
    img = np.clip(img, vmin, vmax)
    return (img - vmin) / (vmax - vmin)

def standardization(img):
    return (img - img.mean()) / img.std()

def stand_norm(img, img_minmax):
    img = standardization(img)
    return normalize_clip(img, img_minmax[0], img_minmax[1])
        
def produce_images(original_dir, main_dir, data_folders=["1", "2", "3", "4", "5"], train_idx=[1, 2, 3], val_test_idx=[0, 4],
                   preprocess=True, img_n=100, img_size=256):
    if preprocess:
        original_img_folders = [os.path.join(original_dir, f) for f in [data_folders[k] for k in train_idx]]
        img_list = []
        for i in range(len(original_img_folders)):
            img_path_list = os.listdir(original_img_folders[i])
            for j in range(len(img_path_list)):
                image = tifffile.imread(os.path.join(original_img_folders[i], img_path_list[j]))
                img = []
                for l in range(len(image[0][0])):
                    img.append(standardization(image[..., l]).flatten())
                img_list.append([img])
        img_list = np.array(img_list)
        img_minmax = []
        for l in range(len(image[0][0])):
            img_concat = np.concatenate(img_list[:, 0, l, :])
            img_minmax.append([np.percentile(img_concat, 0.1), np.percentile(img_concat, 99.9)])
        print("norm_measure done")
        
    train_aug = A.Compose(
        [
            A.ShiftScaleRotate(
                shift_limit=0.2, scale_limit=0.2,
                rotate_limit=30, p=0.7,
                border_mode=cv2.BORDER_REFLECT_101
            ),
            A.CropNonEmptyMaskIfExists(
                height=img_size,
                width=img_size,
                p=1.0
            ),
        ],
        additional_targets={"image1": "image",
                            "image2": "image",
                            "image3": "image"},
        strict=True,
        seed=137,
    )
    original_img_folders = [os.path.join(original_dir, f) for f in [data_folders[k] for k in train_idx]]
    img_folders = [os.path.join(main_dir, f) for f in [data_folders[k] for k in train_idx]]
    for i in range(len(original_img_folders)):
        img_path_list = os.listdir(original_img_folders[i])
        for j in range(len(img_path_list)):
            image = tifffile.imread(os.path.join(original_img_folders[i],img_path_list[j]))
            if preprocess:
                for l in range(len(image[0][0])):
                    image[..., l] = stand_norm(image[..., l], img_minmax[l])
            phase1, phase2, mito = image[..., 0], image[..., 1], image[..., 2]
            mask = (mito > 0).astype(np.float32)
            for k in range(img_n):
                augmented = train_aug(image=phase1, image1=phase2, image2=mito, mask=mask)
                image_crop = np.stack([augmented['image'], augmented['image1'], augmented['image2']], axis=-1)
                base_name, ext = os.path.splitext(img_path_list[j])
                new_filename = f"{base_name}_{k}{ext}"
                save_path = os.path.join(img_folders[i], new_filename)
                tifffile.imwrite(save_path, image_crop)
        print(f"folder {img_folders[i]} done")
    
    if preprocess:
        original_img_folders = [os.path.join(original_dir, f) for f in [data_folders[k] for k in val_test_idx]]
        img_folders = [os.path.join(main_dir, f) for f in [data_folders[k] for k in val_test_idx]]
        for i in range(len(original_img_folders)):
            img_path_list = os.listdir(original_img_folders[i])
            for j in range(len(img_path_list)):
                image = tifffile.imread(os.path.join(original_img_folders[i],img_path_list[j]))
                for l in range(len(image[0][0])):
                    image[..., l] = stand_norm(image[..., l], img_minmax[l])
                tifffile.imwrite(os.path.join(img_folders[i],img_path_list[j]), image)
            print(f"folder {img_folders[i]} done")


def produce_images_2(original_dir, preprocess=True, img_n=100, img_size=256):
    main_dir = original_dir
    if preprocess:
        original_img_folders = [os.path.join(original_dir, f) for f in ['1', '2', '3', '4', '5']]
        img_list = []
        for i in range(len(original_img_folders)):
            img_path_list = os.listdir(original_img_folders[i])
            for j in range(len(img_path_list)):
                image = tifffile.imread(os.path.join(original_img_folders[i], img_path_list[j]))
                img = []
                for l in range(len(image[0][0])):
                    img.append(standardization(image[..., l]).flatten())
                img_list.append([img])
        img_list = np.array(img_list)
        img_minmax = []
        for l in range(len(image[0][0])):
            img_concat = np.concatenate(img_list[:, 0, l, :])
            img_minmax.append([np.percentile(img_concat, 0.1), np.percentile(img_concat, 99.9)])
        print("norm_measure done")
        
    train_aug = A.Compose(
        [
            A.ShiftScaleRotate(
                shift_limit=0.2, scale_limit=0.2,
                rotate_limit=30, p=0.7,
                border_mode=cv2.BORDER_REFLECT_101
            ),
            A.CropNonEmptyMaskIfExists(
                height=img_size,
                width=img_size,
                p=1.0
            ),
        ],
        additional_targets={"image1": "image",
                            "image2": "image",
                            "image3": "image"},
        strict=True,
        seed=137,
    )
    original_img_folders = [os.path.join(original_dir, f) for f in ['1', '2', '3', '4', '5']]
    img_folders = [os.path.join(main_dir, f) for f in ['c1', 'c2', 'c3', 'c4', 'c5']]
    for i in range(len(original_img_folders)):
        img_path_list = os.listdir(original_img_folders[i])
        for j in tqdm(range(len(img_path_list))):
            ori_pth = os.path.join(original_img_folders[i],img_path_list[j])
            image = tifffile.imread(ori_pth)
            if preprocess:
                for l in range(len(image[0][0])):
                    image[..., l] = stand_norm(image[..., l], img_minmax[l])
            phase1, phase2, mito = image[..., 0], image[..., 1], image[..., 2]
            mask = (mito > 0).astype(np.float32)
            tifffile.imwrite(ori_pth, image)
            for k in range(img_n):
                augmented = train_aug(image=phase1, image1=phase2, image2=mito, mask=mask)
                image_crop = np.stack([augmented['image'], augmented['image1'], augmented['image2']], axis=-1)
                base_name, ext = os.path.splitext(img_path_list[j])
                new_filename = f"{base_name}_{k}{ext}"
                save_path = os.path.join(img_folders[i], new_filename)
                tifffile.imwrite(save_path, image_crop)
        print(f"folder {img_folders[i]} done")

In [4]:
produce_images_2(r"D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans")

  original_init(self, **validated_kwargs)


norm_measure done


100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


folder D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans\c1 done


100%|██████████| 50/50 [02:50<00:00,  3.40s/it]


folder D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans\c2 done


100%|██████████| 50/50 [03:27<00:00,  4.16s/it]


folder D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans\c3 done


100%|██████████| 50/50 [03:43<00:00,  4.47s/it]


folder D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans\c4 done


100%|██████████| 50/50 [02:34<00:00,  3.09s/it]


folder D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans\c5 done


In [5]:
produce_images_2(r"D:\Matsusaka\data_mito\HeLa_Su9-mSG_kmeans")

norm_measure done


100%|██████████| 50/50 [02:01<00:00,  2.43s/it]


folder D:\Matsusaka\data_mito\HeLa_Su9-mSG_kmeans\c1 done


100%|██████████| 50/50 [02:40<00:00,  3.21s/it]


folder D:\Matsusaka\data_mito\HeLa_Su9-mSG_kmeans\c2 done


100%|██████████| 50/50 [02:54<00:00,  3.49s/it]


folder D:\Matsusaka\data_mito\HeLa_Su9-mSG_kmeans\c3 done


100%|██████████| 50/50 [03:02<00:00,  3.64s/it]


folder D:\Matsusaka\data_mito\HeLa_Su9-mSG_kmeans\c4 done


100%|██████████| 50/50 [02:16<00:00,  2.73s/it]


folder D:\Matsusaka\data_mito\HeLa_Su9-mSG_kmeans\c5 done


In [None]:
produce_images("D:\\Matsusaka\\data_mito\\COS7_KDEL-mSG_UNetPipeline", "D:\\Matsusaka\\data_mito\\COS7_KDEL-mSG_UNetPipeline_crop",
               data_folders=["1", "2", "3"], train_idx=[0], val_test_idx=[1,2], preprocess=False)
produce_images("D:\\Matsusaka\\data_mito\\COS7_KDEL-mSG", "D:\\Matsusaka\\data_mito\\COS7_KDEL-mSG_original_crop",
               data_folders=["1", "2", "3"], train_idx=[0], val_test_idx=[1,2], preprocess=True)

In [2]:
os.listdir(r"D:\Matsusaka\data_mito\COS7_KDEL-mSG_kmeans")

['1', '2', '3', '4', '5', 'c1', 'c2', 'c3', 'c4', 'c5']