In [None]:
import os, glob
from PIL import Image
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import cv2

# BEFORE
'''
root_dir
    -- test
        --x
    -- train
        --x
        --y
'''

#아래와같은 디렉토리 구조를 만들어주세요.
#AFTER
'''
root_dir
    -- test
        --input1
        --input2
        --x
    -- train
        --input1
        --input2
        --binary_mask
        --x
        --y
        --val
            --input1
            --input2
            --binary_mask

'''

In [None]:
root = './'

test_file_paths = glob.glob(os.path.join(root,'test', 'x', '*.png'))
train_file_paths = glob.glob(os.path.join(root,'train', 'x', '*.png'))
train_img_paths = root + 'train/x/'
train_mask_paths = root + 'train/y/'
test_img_paths = root + 'test/x/'

seed = 1024

except_filepath_list = []

for path in tqdm(train_file_paths):
    filename = os.path.basename(path)
    img_path = train_img_paths + filename 
    mask_path = train_mask_paths + filename
    img = np.array(Image.open(img_path))
    mask = np.array(Image.open(mask_path))

    # 크기 754x1508 아니면 제외
    if (img.shape[0] != 754 or mask.shape[0] != 754):
        except_filepath_list.append(path)
    
    # mask 왼쪽에 신축,갱신(1,3), 오른쪽에 소멸(2) 있으면 제외 
    mask_left = np.array(mask[:,:754-10,])
    mask_right = np.array(mask[:,754+10:,])
    if (sum(mask_left[mask_left==1]) + sum(mask_left[mask_left==3]) + sum(mask_right[mask_right==2])>0):
        except_filepath_list.append(path)

# 삭제 파일 목록 기록
f = open('./except_list.txt','w')
for path in except_filepath_list:
    f.write(path +'\n')
f.close()


# 파일 삭제
f = open('./except_list.txt','r')
for file in tqdm(f.readlines()):
    file = file.strip()
    filename = os.path.basename(file)
    os.remove(train_img_paths + filename)
    os.remove(train_mask_paths + filename)
f.close()

In [None]:
# train x 이미지 분할 
for path in tqdm(train_file_paths):
    filename = os.path.basename(path)
    img = np.array(train_img_paths + filename)
    left = Image.fromarray(img[:,:754,:])
    right = Image.fromarray(img[:,754:,:])
    left.save(root +'train/input1/'+ filename)
    right.save(root + 'train/input2/'+ filename)

# train y 이미지 분할 후 합치기
mask_binary_dir = root+'train/binary_mask/'
for path in tqdm(train_file_paths):
    filename = os.path.basename(path)
    mask = cv2.imread(train_mask_paths+filename, cv2.IMREAD_GRAYSCALE)
    mask_left = mask[:,:754]
    mask_right = mask[:,754:]
    new_mask = mask_left + mask_right
    new_mask[new_mask > 0] = 1
    cv2.imwrite(mask_binary_dir+filename, np.array(new_mask,dtype=np.uint8))

# test x 이미지 분할
for path in tqdm(test_file_paths):
    filename = os.path.basename(path)
    img = np.array(test_img_paths + filename)
    left = Image.fromarray(img[:,:754,:])
    right = Image.fromarray(img[:,754:,:])
    left.save(root +'test/input1/'+ filename)
    right.save(root + 'test/input2/'+ filename)


In [None]:
# train 검증파일 분할
from sklearn.model_selection import KFold
import shutil

binary_train_file_paths = glob.glob(os.path.join(root, 'train', 'input1', '*.png'))
k_fold = 5

splits=KFold(n_splits=k_fold,shuffle=True,random_state=seed)

for train_idx,val_idx in splits.split(train_file_paths):
    break

for i in val_idx:
    filename = os.path.basename(binary_train_file_paths[i])
    shutil.move(root + 'train/input1/' + filename, root + 'train/val/input1/' + filename)
    shutil.move(root + 'train/input2/' + filename, root + 'train/val/input2/' + filename)
    shutil.move(root + 'train/binary_mask/' + filename, root + 'train/val/binary_mask/' + filename)