In [1]:
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy.io import loadmat, savemat
from sklearn.model_selection import train_test_split

import os
import shutil
import random

In [2]:
folder_path = os.path.dirname(os.path.dirname(os.getcwd()))
folder_path

'C:\\Users\\samko\\Desktop\\Files\\UNI\\DP'

In [2]:
def roi_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized = cv2.equalizeHist(gray)
    _, binary = cv2.threshold(equalized, 240, 255, cv2.THRESH_BINARY)
    contours, x = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest_contour)
    
    return x, y, w, h

In [3]:
 def crop_image(image, crop_perc_height, crop_perc_width):
        '''
        Crops the image, cutting from height and width depending on crop_perc.
        '''
        height, width = image.shape[:2]
        h_crop = int((crop_perc_height/100*height)//2)
        w_crop = int((crop_perc_width/100*width)//2)
        return image[h_crop:(height-h_crop), w_crop:(width-w_crop)]

In [48]:
directory_path_labels = r''+folder_path+r'\Data\Labels'
targer_dir_path_labels = r''+folder_path+r'\Data\LabelsCropped'
filenames_labels = os.listdir(directory_path_labels)

In [49]:
directory_path_images = r''+folder_path+r'\Data\Images'
targer_dir_path_images = r''+folder_path+r'\Data\ImagesCropped'
filenames_images = os.listdir(directory_path_images)

In [50]:
source_folder = r''+folder_path+r'\Data'

train_image_folder = targer_dir_path_images + "\Train"
train_label_folder = targer_dir_path_labels + "\Train"

valid_image_folder = targer_dir_path_images + "\Valid"
valid_label_folder = targer_dir_path_labels + "\Valid"

In [51]:
os.makedirs(train_image_folder, exist_ok=True)
os.makedirs(valid_image_folder, exist_ok=True)

In [52]:
os.makedirs(train_label_folder, exist_ok=True)
os.makedirs(valid_label_folder, exist_ok=True)

In [9]:
for filename_img, filename_lab in zip(filenames_images, filenames_labels):
    src_image = directory_path_images +'\/'+ filename_img
    trg_image = targer_dir_path_images + '\/' + filename_img

    src_label = directory_path_labels +'\/'+ filename_lab
    trg_label = targer_dir_path_labels + '\/' + filename_lab

    #print(src_label + "\n" + src_image + "\n")
    image = cv2.imread(src_image)
    label = loadmat(src_label).get('mask')

    # image = crop_image(image, 20, 40)
    # label = crop_image(label, 20, 40)
    
    x, y, w, h = roi_image(image)
    if h>w:
        cv2.imwrite(trg_image, image[y:y+h, x:x+h])
        savemat(trg_label, {'mask':label[y:y+h, x:x+h]})
    if w>=h:
        cv2.imwrite(trg_image, image[y:y+w, x:x+w])
        savemat(trg_label, {'mask':label[y:y+w, x:x+w]})

In [53]:
file_image_list = os.listdir(targer_dir_path_images)
file_label_list = os.listdir(targer_dir_path_labels)

In [8]:
# total_files = len(file_image_list)

# train_size = int(0.75 * total_files)
# valid_size = total_files - train_size - 10

In [54]:
file_label_list = file_label_list[:-2]

In [55]:
file_image_list = file_image_list[:-2]

In [57]:
file_image_list[-1]

'2484.jpg'

In [58]:
X_train, X_valid, y_train, y_valid = train_test_split(file_image_list, file_image_list, test_size=0.2, random_state=42)

In [59]:
for file_name in X_train:
    src_image_path = os.path.join(targer_dir_path_images, file_name)
    src_label_path = os.path.join(targer_dir_path_labels, file_name.replace("jpg", "mat"))

    dst_image_path = os.path.join(train_image_folder, file_name)
    dst_label_path = os.path.join(train_label_folder, file_name.replace("jpg", "mat"))
    
    shutil.copy(src_image_path, dst_image_path)
    shutil.copy(src_label_path, dst_label_path)

In [60]:
for file_name in X_valid:
    src_image_path = os.path.join(targer_dir_path_images, file_name)
    src_label_path = os.path.join(targer_dir_path_labels, file_name.replace("jpg", "mat"))

    dst_image_path = os.path.join(valid_image_folder, file_name)
    dst_label_path = os.path.join(valid_label_folder, file_name.replace("jpg", "mat"))
    
    shutil.copy(src_image_path, dst_image_path)
    shutil.copy(src_label_path, dst_label_path)

In [9]:
# for file_name in file_image_list[:train_size]:
#     src_image_path = os.path.join(targer_dir_path_images, file_name)
#     src_label_path = os.path.join(targer_dir_path_labels, file_name.replace("jpg", "mat"))
    
#     dst_image_path = os.path.join(train_image_folder, file_name)
#     dst_label_path = os.path.join(train_label_folder, file_name.replace("jpg", "mat"))
    
#     shutil.copy(src_image_path, dst_image_path)
#     shutil.copy(src_label_path, dst_label_path)

In [10]:
for file_name in file_image_list[train_size:train_size + valid_size]:
    src_image_path = os.path.join(targer_dir_path_images, file_name)
    src_label_path = os.path.join(targer_dir_path_labels, file_name.replace("jpg", "mat"))
    
    dst_image_path = os.path.join(valid_image_folder, file_name)
    dst_label_path = os.path.join(valid_label_folder, file_name.replace("jpg", "mat"))
    
    shutil.copy(src_image_path, dst_image_path)
    shutil.copy(src_label_path, dst_label_path)

In [65]:
for file_name in file_image_list[train_size + valid_size:]:
    src_image_path = os.path.join(targer_dir_path_images, file_name)
    src_label_path = os.path.join(targer_dir_path_labels, file_name.replace("jpg", "mat"))
    
    dst_image_path = os.path.join(test_image_folder, file_name)
    dst_label_path = os.path.join(test_label_folder, file_name.replace("jpg", "mat"))
    
    shutil.copy(src_image_path, dst_image_path)
    shutil.copy(src_label_path, dst_label_path)

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\samko\\Desktop\\Files\\UNI\\DP\\Datasets\\ORIGA\\ImagesCropped\\Train'