In [5]:
import os
import cv2
import glob
import shutil
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt

**DESCRIPTIVE STATISTICS**

**TEST IMAGES**

In [2]:
path_to_bus = '/external_drive/BUS_Deep_Learning/data/test_images/STU-Hospital/Hospital'

In [3]:
path_to_test_imgs = '/external_drive/BUS_Deep_Learning/data/seg_dir_test_images'
path_to_test_masks = '/external_drive/BUS_Deep_Learning/data/seg_dir_test_masks'
os.makedirs(path_to_test_imgs,exist_ok=True)
os.makedirs(path_to_test_masks,exist_ok=True)

In [4]:
for img in os.listdir(path_to_bus):
    if 'Test' in img:
        shutil.copy(os.path.join(path_to_bus,img),path_to_test_imgs)
    if 'mask' in img:
        shutil.copy(os.path.join(path_to_bus,img),path_to_test_masks)
        os.rename(os.path.join(path_to_test_masks,img), os.path.join(path_to_test_masks,img.replace('mask','Test_Image')))

**TRAINING/VAL IMAGES**

In [6]:
path_to_bus = '/external_drive/BUS_Deep_Learning/data/Dataset_BUSI_with_GT'

In [7]:
images = []
masks = []
lesion_state = []
for condition in os.listdir(path_to_bus):
    for bus in os.listdir(os.path.join(path_to_bus,condition)):
        if 'mask' in bus:
            masks.append(bus)
        else:
            images.append(bus)
            lesion_state.append(condition)

In [8]:
fmatrix = pd.DataFrame(columns=['images','lesion_state'])
fmatrix['images'] = images
fmatrix['lesion_state'] = lesion_state

In [9]:
fmatrix['lesion_state'].value_counts()

benign       437
malignant    210
normal       133
Name: lesion_state, dtype: int64

In [None]:
fmatrix = fmatrix.loc[fmatrix['lesion_state']!='normal'].sample(frac=1)

In [None]:
fmatrix.head()

In [10]:
train_set, val_set = train_test_split(fmatrix, test_size=0.2, stratify=fmatrix['lesion_state'], random_state=9)

In [11]:
train_set['lesion_state'].value_counts()

benign       350
malignant    168
normal       106
Name: lesion_state, dtype: int64

In [12]:
val_set['lesion_state'].value_counts()

benign       87
malignant    42
normal       27
Name: lesion_state, dtype: int64

In [None]:
path_to_train_images = '/external_drive/BUS_Deep_Learning/data/seg_dir_unet/train_images'
path_to_val_images = '/external_drive/BUS_Deep_Learning/data/seg_dir_unet/val_images'
os.makedirs(path_to_train_images,exist_ok=True)
os.makedirs(path_to_val_images,exist_ok=True)

path_to_train_masks = '/external_drive/BUS_Deep_Learning/data/seg_dir_unet/train_masks'
path_to_val_masks = '/external_drive/BUS_Deep_Learning/data/seg_dir_unet/val_masks'
os.makedirs(path_to_train_masks,exist_ok=True)
os.makedirs(path_to_val_masks,exist_ok=True)

**CREATE DIR FOR TRAINING MASK R-CNN**

In [None]:
for condition in os.listdir(path_to_bus):
    for bus in os.listdir(os.path.join(path_to_bus,condition)):
        if 'mask' in bus:
            if bus.split('_')[0]+'.png' in train_set['images'].values:
                shutil.copy(os.path.join(path_to_bus,condition,bus),os.path.join(path_to_train_masks,bus))
            elif bus.split('_')[0]+'.png' in val_set['images'].values:
                shutil.copy(os.path.join(path_to_bus,condition,bus),os.path.join(path_to_val_masks,bus))
        else:
            if bus in train_set['images'].values:
                shutil.copy(os.path.join(path_to_bus,condition,bus),os.path.join(path_to_train_images,bus))
            elif bus in val_set['images'].values:
                shutil.copy(os.path.join(path_to_bus,condition,bus),os.path.join(path_to_val_images,bus))

**CREATE DIR FOR TRAINING U-NET**

In [None]:
def merge_masks(arr):
    if np.any(arr!=0.0):
        return 255.0
    else:
        return 0.0

#Train Set
print('Train Set')
for idx, row in train_set.iterrows():
    path_to_image = os.path.join(path_to_bus,row['lesion_state'],row['images'])
    print(path_to_image)
    shutil.copy(path_to_image,os.path.join(path_to_train_images,row['images']))
    path_to_masks = glob.glob(path_to_image.replace('.png','_mask*'))
    print('number of masks-->',len(path_to_masks))
    masks = []
    for path_to_mask in path_to_masks:
        masks.append(cv2.imread(path_to_mask, cv2.IMREAD_GRAYSCALE))
    masks = np.array(masks)
    mask = np.apply_along_axis(merge_masks, 0, masks)
    cv2.imwrite(os.path.join(path_to_train_masks,row['images']),mask)

print('')

#Validation Set    
print('Val Set')
for idx, row in val_set.iterrows():
    path_to_image = os.path.join(path_to_bus,row['lesion_state'],row['images'])
    print(path_to_image)
    shutil.copy(path_to_image,os.path.join(path_to_val_images,row['images']))
    path_to_masks = glob.glob(path_to_image.replace('.png','_mask*'))
    print('number of masks-->',len(path_to_masks))
    masks = []
    for path_to_mask in path_to_masks:
        masks.append(cv2.imread(path_to_mask, cv2.IMREAD_GRAYSCALE))
    masks = np.array(masks)
    mask = np.apply_along_axis(merge_masks, 0, masks)
    cv2.imwrite(os.path.join(path_to_val_masks,row['images']),mask)