In [28]:
import os
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
import json
from sklearn.model_selection import GroupKFold
import albumentations as A
import cv2
import pandas as pd
from PIL import Image

np.random.seed(42)

# Format Data

In [2]:
def permute(image):
    image = torch.Tensor(image)
    image = image.permute(2,0,1).numpy()
    return image

In [3]:
DATA_PATH = '../data/HMT_dataset/raw_data/train/'
OUT_PATH = '../data/HMT_dataset/processed_data/'
TABLE_PATH = '../data/split_tables/HMT/'
os.makedirs(TABLE_PATH,exist_ok=True)

labels = pd.read_csv('../data/HMT_dataset/raw_data/train_labels.csv')
#labels = labels.fillna(0)

In [5]:
patient_list = labels['id'].tolist()

In [18]:
for patient in tqdm(patient_list):
    
    image = Image.open(DATA_PATH+patient+'.tif') 
    image = np.array(image) 
    target = labels[labels['id']==patient]['label'].values[0]
    

    target = np.array(target)
        
    #save
    np.save(OUT_PATH+f'/{patient}_label.npy',target)
    np.save(OUT_PATH+f'/{patient}_image.npy',image)
    
    

100%|██████████| 220025/220025 [1:49:28<00:00, 33.50it/s]


# Prepare split tables

In [31]:
patient_list = [OUT_PATH[1:]+i for i in os.listdir(OUT_PATH) if i.find('_image.npy')!=-1]
print(f'Total number of patients: {len(patient_list)}')

patient_arr = np.array(patient_list)

patient_arr = np.random.permutation(patient_arr)

Total number of patients: 220025


In [33]:
#create test
kf = GroupKFold(n_splits=5)

for (train,test) in kf.split(patient_arr,patient_arr,patient_arr):
    
    patient_test = patient_arr[test]
    
    
    #create test
    split = {
            'test': patient_test.tolist(),
        }
    
    with open(f'{TABLE_PATH}test_split_table.json', 'w') as outfile:
            json.dump(split, outfile)
    break



print(f'Number of images, test subset: {patient_test.shape[0]}')

patient_arr = patient_arr[train]

Number of images, test subset: 44005


In [41]:
#create train and validation
n_patients = [2,4,8]


for i in n_patients:
    
    
    patient_train = patient_arr[:int(patient_arr.shape[0]*(i/100/70))]
    patient_val = patient_arr[-1*int(patient_arr.shape[0]*(0.02/70)):]
    
    patient_petrain = patient_arr.copy()
    patient_petrain = np.setdiff1d(patient_petrain, patient_train)
    
    split = {
            'train': patient_train.tolist(),
            'val': patient_val.tolist(),
            'pretrain': patient_petrain.tolist(),
        }
    
    with open(f'{TABLE_PATH}{i}_split_table.json', 'w') as outfile:
            json.dump(split, outfile)
            
    print(f'Number of images, val subset: {patient_val.shape[0]}')
    print(f'Number of images, train subset: {patient_train.shape[0]}')
    print(f'Number of images, pretain subset: {patient_petrain.shape[0]}')
    print('#######################################################')


    

Number of images, val subset: 50
Number of images, train subset: 50
Number of images, pretain subset: 175970
#######################################################
Number of images, val subset: 50
Number of images, train subset: 100
Number of images, pretain subset: 175920
#######################################################
Number of images, val subset: 50
Number of images, train subset: 201
Number of images, pretain subset: 175819
#######################################################


In [36]:
#create UB

patient_train = patient_arr[patient_arr.shape[0]//5:]
patient_val = patient_arr[:patient_arr.shape[0]//5]


split = {
        'train': patient_train.tolist(),
        'val': patient_val.tolist(),
    }

with open(f'UB_split_table.json', 'w') as outfile:
        json.dump(split, outfile)

print(f'Number of images, val subset: {patient_val.shape[0]}')
print(f'Number of images, train subset: {patient_train.shape[0]}')
print('#######################################################')

Number of images, val subset: 35204
Number of images, train subset: 140816
#######################################################


# Review augs

In [None]:
prob = 1
augs = A.Compose(
                [ 
                    A.HorizontalFlip(p=prob),
                    A.VerticalFlip(p=prob),
                    A.Rotate(limit=170, p=prob),
                    A.ElasticTransform(alpha=0.1,p=prob),
                    A.RandomSizedCrop(min_max_height=(120, 120), height=300, width=300, p=prob),
                    A.RandomGamma(gamma_limit=(80, 120), p=prob)
                ]
            )
temp = np.transpose(temp.astype(np.float32), (1, 2, 0))
augmented = augs(image=temp)
temp = np.transpose(temp.astype(np.float32), (2, 0, 1))
image = augmented['image']
image = np.transpose(image.astype(np.float32), (2, 0, 1))

plt.imshow(temp[0],'gray')

In [None]:
plt.imshow(image[0],'gray')

In [None]:
import numpy as np
import cv2

def rotate_image(image, angle):
    image = np.transpose(image.astype(np.float32), (1, 2, 0))
    image_center = tuple(np.array(image.shape[1::-1]) / 2)
    rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
    result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
    result = np.transpose(result.astype(np.float32), (2, 0, 1))
    return result

In [None]:
y = rotate_image(image,90)
plt.imshow(y[2],'gray')