In [1]:
from prototypes.deeplearning.dataloader.IsicDataLoader import IsicDataLoaderFolders, over_under_sample, load_val_images, create_folds, AugmentationWrapper
from prototypes.utility.data import ProjectConfiguration
import pandas as pd

In [2]:
config = ProjectConfiguration("../config.json")

In [3]:
config.get_keys()

In [4]:
metadata_df = pd.read_csv(config.get_value("TRAIN_METADATA"), engine="python")
columns = config.get_value("METADATA_COLUMNS").split("\t")

isic_id, metadata_array, labels = metadata_df["isic_id"].values, metadata_df[columns].values, metadata_df[
    "target"].values

In [5]:
folds_config_dict = create_folds(isic_id=isic_id, metadata=metadata_array, labels=labels, config=config)

In [6]:
folds_config_dict

# K - Folds 

In [7]:
from PIL import Image
import os
import copy
import shutil
from tqdm.auto import tqdm
import albumentations as A
import numpy as np


augmentation_oversampling = A.Compose([
    # crop border avois multiple instances
    A.Rotate(limit=(-360, 360), p=0.9, crop_border=False),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=1.0),
    # water reflexion effect magnification and min
    A.GridDistortion(p=0.5),
    # Only with crop_border = True
    A.Resize(height=config.get_value("IMAGE_HEIGHT"), width=config.get_value("IMAGE_WIDTH"))
])

dataset_path = config.get_value("DATASET_PATH")
shutil.rmtree(os.path.join(dataset_path, "splits"), ignore_errors=True)

train_metadata = []
val_metadata = []

image_source_path = config.get_value("TRAIN_IMAGES_PATH")
for fold_index in tqdm(folds_config_dict.keys()):
    print(f"Fold: {fold_index}")
    
    os.makedirs(os.path.join(dataset_path, "splits", f"fold_{fold_index}", "train", "0"), exist_ok=True)
    os.makedirs(os.path.join(dataset_path, "splits", f"fold_{fold_index}", "train", "1"), exist_ok=True)
    os.makedirs(os.path.join(dataset_path, "splits", f"fold_{fold_index}", "val", "0"), exist_ok=True)
    os.makedirs(os.path.join(dataset_path, "splits", f"fold_{fold_index}", "val", "1"), exist_ok=True)
    
    normal_images_ids = folds_config_dict[fold_index]["train"]["isic_id"][np.where(folds_config_dict[fold_index]["train"]["target"]==0)]
    anomaly_images_ids = folds_config_dict[fold_index]["train"]["isic_id"][np.where(folds_config_dict[fold_index]["train"]["target"]==1)]
    
    print(f"total images: {len(normal_images_ids) + len(anomaly_images_ids)}")
    print("Over and under sampling...")
    over_under_sample(normal_images_ids=normal_images_ids,
                      anomaly_images_ids=anomaly_images_ids,
                      augmentation_transform=augmentation_oversampling,
                      root_path=os.path.join(dataset_path, "splits", f"fold_{fold_index}"),
                      config=config)
    
    print("Saving validation images to disk...")
    for isic_id, target in tqdm(zip(folds_config_dict[fold_index]["val"]["isic_id"], folds_config_dict[fold_index]["val"]["target"]), total=len(folds_config_dict[fold_index]["val"]["isic_id"])):
        img = copy.deepcopy(Image.open(os.path.join(image_source_path, f"{isic_id}.jpg")))
        img.save(os.path.join(dataset_path, "splits", f"fold_{fold_index}", "val", f"{int(target)}", f"{isic_id}.jpg"))

In [8]:
import glob
import os

In [9]:
fold_index = 1

dataset_path = config.get_value("DATASET_PATH")

root = os.path.join(dataset_path, "splits", f"fold_{fold_index}", "train")

file_list = glob.glob(os.path.join(root, "*.jpg"))

In [10]:
os.listdir(root)

In [11]:
file_list[:5]

In [12]:
dataloader = IsicDataLoaderFolders(root=root)

In [13]:
next(iter(dataloader))