### U-Net Implementation of Knee Bony Image Segmentation: Training set up
#### 3/17/20
##### https://towardsdatascience.com/a-keras-pipeline-for-image-segmentation-part-1-6515a421157d

In [12]:
import os
import random
import re
from PIL import Image
import zipfile as zf

##### Location of data directory

In [2]:
DATA_PATH = r'C:\Users\vazir\Desktop/data_dir'
FRAME_PATH = r'C:\Users\vazir\Desktop/data_dir/frames/'
MASK_PATH = r'C:\Users\vazir\Desktop/data_dir/masks/'

##### Create folders for storing images and masks


In [3]:
folders = ['train_frames', 'train_masks', 'val_frames', 'val_masks', 'test_frames', 'test_masks']

for folder in folders:
  os.makedirs(os.path.join(DATA_PATH, folder))

##### Randomize split of train, test, and val
##### List the files, sort them, and then shuffle them 

In [4]:
all_frames = os.listdir(FRAME_PATH)
all_masks = os.listdir(MASK_PATH)

In [5]:
all_frames.sort(key=lambda var:[int(x) if x.isdigit() else x 
                                for x in re.findall(r'[^0-9]|[0-9]+', var)])
all_masks.sort(key=lambda var:[int(x) if x.isdigit() else x 
                               for x in re.findall(r'[^0-9]|[0-9]+', var)])
random.seed(230)
random.shuffle(all_frames)

##### Generate train, val, and test sets for frames
##### Using 70–20–10 ratio for train, val, and test sets 

In [6]:
train_split = int(0.7*len(all_frames))
val_split = int(0.9*len(all_frames))

train_frames = all_frames[:train_split]
val_frames = all_frames[train_split:val_split]
test_frames = all_frames[val_split:]

##### Create corresponding mask sets

In [7]:
train_masks = [f for f in all_masks if f in train_frames]
val_masks = [f for f in all_masks if f in val_frames]
test_masks = [f for f in all_masks if f in test_frames]

##### Save the images from these lists to their corresponding folders

In [8]:
def add_frames(dir_name, image):
  img = Image.open(FRAME_PATH+image)
  img.save(DATA_PATH+'/{}'.format(dir_name)+'/'+image)
  
def add_masks(dir_name, image):
  img = Image.open(MASK_PATH+image)
  img.save(DATA_PATH+'/{}'.format(dir_name)+'/'+image)


In [9]:
frame_folders = [(train_frames, 'train_frames'), (val_frames, 'val_frames'), 
                 (test_frames, 'test_frames')]
mask_folders = [(train_masks, 'train_masks'), (val_masks, 'val_masks'), 
                (test_masks, 'test_masks')]

##### Mapping each image to the add_frames() and add_masks() functions respectively 

In [10]:
for folder in frame_folders:
  
  array = folder[0]
  name = [folder[1]] * len(array)

  list(map(add_frames, name, array))   

for folder in mask_folders:
  
  array = folder[0]
  name = [folder[1]] * len(array)
  
  list(map(add_masks, name, array))