In [1]:
import sys
sys.path.append('../../../')
sys.path.append('/home/sheida.rahnamai/GIT/HDN/')
import torch
import pickle
import numpy as np
from tqdm import tqdm
from lib.dataloader import CustomDataset, CombinedCustomDataset
import tifffile as tiff
from sklearn.utils import shuffle
import os
from boilerplate import boilerplate

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

patch_size = 64
sample_size = 350
centre_size = 4
n_channel = 32
hierarchy_level = 3
pad_size = (patch_size - centre_size) // 2

In [3]:
classes = ['uncategorized', 'nucleus', 'granule', 'mitochondria']
train_labeled_indices = []
val_labeled_indices = []
for cls in classes:
    with open(f'/group/jug/Sheida/pancreatic beta cells/download/train/10_percent_{cls}.pickle', 'rb') as file:
        train_labeled_indices.extend(pickle.load(file))
    with open(f'/group/jug/Sheida/pancreatic beta cells/download/val/10_percent_{cls}.pickle', 'rb') as file:
        val_labeled_indices.extend(pickle.load(file))

In [4]:
# train data

data_dir = "/group/jug/Sheida/pancreatic beta cells/download/"
keys = ['high_c1', 'high_c2', 'high_c3']

# Load source images
train_img_paths = [os.path.join(data_dir + 'train/' + key + f"/{key}_source.tif") for key in keys]
train_lbl_paths = [os.path.join(data_dir + 'train/' + key + f"/{key}_gt.tif") for key in keys]
val_img_paths = [os.path.join(data_dir + 'val/' + key + f"/{key}_source.tif") for key in keys]
val_lbl_paths = [os.path.join(data_dir + 'val/' + key + f"/{key}_gt.tif") for key in keys]

train_images = {key: tiff.imread(path) for key, path in zip(keys, train_img_paths)}
train_labels = {key: tiff.imread(path) for key, path in zip(keys, train_lbl_paths)}

val_images = {key: tiff.imread(path) for key, path in zip(keys, val_img_paths)}
val_labels = {key: tiff.imread(path) for key, path in zip(keys, val_lbl_paths)}

for key in tqdm(keys, desc='filtering out outside of the cell'):
   filtered_image, filtered_label = boilerplate._filter_slices(train_images[key], train_labels[key])
   train_images[key] = filtered_image
   train_labels[key] = filtered_label

   filtered_image, filtered_label = boilerplate._filter_slices(val_images[key], val_labels[key])
   
   val_images[key] = filtered_image
   val_labels[key] = filtered_label

# compute mean and std of the data
all_elements = np.concatenate([train_images[key].flatten() for key in keys])
data_mean = np.mean(all_elements)
data_std = np.std(all_elements)

# normalizing the data
for key in tqdm(keys, 'Normalizing data'):
   train_images[key] = (train_images[key] - data_mean) / data_std
   val_images[key] = (val_images[key] - data_mean) / data_std

train_set = CombinedCustomDataset(train_images, train_labels, train_labeled_indices)
val_set = CombinedCustomDataset(val_images, val_labels, val_labeled_indices)


filtering out outside of the cell: 100%|██████████| 3/3 [00:00<00:00,  4.65it/s]
Normalizing data: 100%|██████████| 3/3 [00:02<00:00,  1.34it/s]
Extracting patches from high_c1: 965it [00:02, 340.90it/s]
Extracting patches from high_c2: 889it [00:02, 338.36it/s]
Extracting patches from high_c3: 907it [00:02, 350.74it/s]
Extracting patches from high_c1: 109it [00:00, 365.69it/s]
Extracting patches from high_c2: 101it [00:00, 403.80it/s]
Extracting patches from high_c3: 103it [00:00, 348.61it/s]
