In [25]:
SEED = 9

import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
import glob
import shutil

# JSR dataset

## Split into Train (70%), Val (15%), Test (15%) -> Quick Preprocess -> Save to Set Dirs

In [21]:
all_examples = [f.split('/')[-1] for f in glob.glob('All_Images/*.IMG')]
all_examples[:5], len(all_examples)

(['JPCLN001.IMG',
  'JPCLN002.IMG',
  'JPCLN003.IMG',
  'JPCLN004.IMG',
  'JPCLN005.IMG'],
 246)

In [22]:
train, val_test = train_test_split(all_examples, test_size=0.3, random_state=SEED)
val, test = train_test_split(val_test, test_size=0.5, random_state=SEED)

len(train), len(val), len(test)

(172, 37, 37)

In [23]:
assert bool(set(train) & set(val)) == False
assert bool(set(train) & set(test)) == False

In [24]:
with open('train_files.txt', 'w') as f:
    for line in train:
        f.write(f"{line}\n")

with open('val_files.txt', 'w') as f:
    for line in val:
        f.write(f"{line}\n")
        
with open('test_files.txt', 'w') as f:
    for line in test:
        f.write(f"{line}\n")

In [38]:
def preprocess(set_, flist):
    try:
        os.mkdir(set_+'/Images')
        os.mkdir(set_+'/Masks')
        os.mkdir(set_+'/Landmarks')
    except:
        pass
    
    for f in flist:
        
        # preprocess images
        
        p = os.path.join('All_Images', f)
        
        w, h = 2048, 2048 

        with open(p, 'rb') as path: 
            dtype = np.dtype('>u2')
            img = np.fromfile(path, dtype=dtype).reshape((h,w)) 

        img = 1 - img.astype('float')  / 4096
        img = cv2.resize(img, (1024,1024))
        img = img*255
       
        p = os.path.join(set_+'/Images', f.replace('.IMG','.png'))
        cv2.imwrite(p, img.astype('uint8'))
        
        # preprocess masks 
        
        src = os.path.join('All_Masks', f.replace('.IMG', '.npy'))
        dst = os.path.join(set_+'/Masks', f.replace('.IMG', '.npy'))
        shutil.copy(src,dst)
        print(np.unique(np.load(src)))
        
        # preprocess landmarks
        
        src = os.path.join('All_Landmarks', f.replace('.IMG', '.npy'))
        dst = os.path.join(set_+'/Landmarks', f.replace('.IMG', '.npy'))
        shutil.copy(src,dst)

        
        
        
        
    
    
    

In [39]:
trainlist = open('train_files.txt','r').read().splitlines()
preprocess('Train', trainlist)

[0. 1. 2.]
[0. 1. 2.]
[0. 1. 2.]
[0. 1. 2.]
[0. 1. 2.]
[0. 1. 2.]


KeyboardInterrupt: 

In [32]:
vallist = open('val_files.txt','r').read().splitlines()
preprocess('Val', vallist)

In [33]:
testlist = open('test_files.txt','r').read().splitlines()
preprocess('Test', testlist)