In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from paths import dataset_paths, dataset_test_paths, dataset_test_paths_tests, combined_list, combined_list_excluded, dataset_test_paths_excluded
import random
import os
from PIL import Image, ImageOps, ImageDraw
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import numpy as np

In [3]:
def create_image_pairs(height, width, path_anchors, path_positives, dataset_size, rgb=True):
    """
    Function to create image pairs and save it to folders on disk
    Takes pairs from the same image in same folder
    
    Arguments:
        height: desired height of the cropped images - integer
        width: desired width of the cropped images - interger
        rgb: rgb oder gray images, standard rgb=True
        path_anchor: path of the folder for anchor images to save in on disk - string_format
        path_positives: path of the folder for positive images to save in on disk - string_format
        dataset_size: how many image pairs should be generated and saved on disk - integer
    """
    
    i = 0
    #padding for image saving
    padding = len(str(dataset_size))

    while i < dataset_size:

        #select random path and image
        path = (random.choice(dataset_paths))
        image = random.choice(os.listdir(path))

        #Convert image to grayscale iw wanted
        if rgb == True:
            image = Image.open(path + "/" + image)
        else:
            image = Image.open(path + "/" + image).convert("L")

        #check if image is wider than 500 px
        if image.size[0] > 500:
            #resize the image to specific height and keeping the same aspect ratio
            height_precent = (height / float(image.size[1]))
            resized_width = int((float(image.size[0]) * float(height_precent)))
            image = image.resize((resized_width, height), Image.NEAREST)

            #crop randomly 2 images out of source image
            img_array = np.array(image)
            x_max = img_array.shape[1] - width
            x_1 = np.random.randint(0, x_max)
            x_2 = np.random.randint(0, x_max)
            anchor = img_array[0:width, x_1: x_1 + width]
            positive = img_array[0:width, x_2: x_2 + width]

            #save the crops on disk
            anchor = Image.fromarray(anchor)
            positive = Image.fromarray(positive)

            anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
            positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))

            i += 1
        else:
            pass

In [4]:
%%time
create_image_pairs(height=224, width=224, path_anchors="npz_datasets/pairs_20k/anchor", path_positives="npz_datasets/pairs_20k/positive", dataset_size=20000, rgb=True)

CPU times: user 2min 30s, sys: 43.8 s, total: 3min 14s
Wall time: 3min 15s


In [13]:
def create_image_pairs_new(height, width, path_anchors, path_positives, dataset_size, rgb=True):
    """
    Function to create image pairs and save it to folders on disk
    Takes pairs from different images in the same folder
    
    Arguments:
        height: desired height of the cropped images - integer
        width: desired width of the cropped images - interger
        rgb: rgb oder gray images, standard rgb=True
        path_anchor: path of the folder for anchor images to save in on disk - string_format
        path_positives: path of the folder for positive images to save in on disk - string_format
        dataset_size: how many image pairs should be generated and saved on disk - integer
    """

    i = 0
    padding = len(str(dataset_size))

    while i < dataset_size:
        path = (random.choice(dataset_paths))
        image_1, image_2 = random.sample(os.listdir(path), 2)

        if rgb == True:
            try:
                image_1 = Image.open(path + "/" + image_1)
                image_2 = Image.open(path + "/" + image_2)
            except:
                continue
        else:
            try:
                image_1 = Image.open(path + "/" + image_1).convert("L")
                image_2 = Image.open(path + "/" + image_2).convert("L")
            except:
                continue

        try:
            if image_1.size[0] > 500 and image_2.size[0] > 500:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                x_max_1 = img_array_1.shape[1] - width
                x_max_2 = img_array_2.shape[1] - width

                x_1 = np.random.randint(0, x_max_1)
                x_2 = np.random.randint(0, x_max_2)

                anchor = img_array_1[0:width, x_1: x_1 + width]
                positive = img_array_2[0:width, x_2: x_2 + width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))
                i += 1
            else:
                pass

        except:
            continue

In [14]:
%%time
create_image_pairs_new(height=224, width=224, path_anchors="npz_datasets/pairs_150k_224_224/anchor", path_positives="npz_datasets/pairs_150k_224_224/positive", dataset_size=150000, rgb=True)

CPU times: user 25min 26s, sys: 5min 30s, total: 30min 56s
Wall time: 31min 13s


In [108]:
def create_image_pairs_rows(height, width, path_anchors, path_positives, dataset_size, rgb=True):
    """
    Function to create image pairs on row level and save it to folders on disk
    Takes pairs from different images in the same folder

    Arguments:
        height: desired height of the cropped images - integer
        width: desired width of the cropped images - interger
        rgb: rgb oder gray images, standard rgb=True
        path_anchor: path of the folder for anchor images to save in on disk - string_format
        path_positives: path of the folder for positive images to save in on disk - string_format
        dataset_size: how many image pairs should be generated and saved on disk - integer
    """

    i = 0
    padding = len(str(dataset_size))

    while i < dataset_size:
        path = (random.choice(dataset_paths))
        image_1, image_2 = random.sample(os.listdir(path), 2)

        if rgb == True:
            try:
                image_1 = Image.open(path + "/" + image_1)
                image_2 = Image.open(path + "/" + image_2)
            except:
                continue
        else:
            try:
                image_1 = Image.open(path + "/" + image_1).convert("L")
                image_2 = Image.open(path + "/" + image_2).convert("L")
            except:
                continue

        try:
            if image_1.size[0] > width and image_2.size[0] > width:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                anchor = img_array_1[0:height, 0:width]
                positive = img_array_2[0:height, 0:width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))

                i += 1
            else:
                pass
        except:
            continue

In [118]:
%%time
create_image_pairs_rows(height=113, width=1000, path_anchors="npz_datasets/test_pairs_113_1000/anchor", path_positives="npz_datasets/test_pairs_113_1000/positive", dataset_size=5000, rgb=True)

CPU times: user 1min 6s, sys: 11.9 s, total: 1min 18s
Wall time: 1min 18s


In [3]:
def create_image_pairs_rows_fixed_size(height, width, crop_width, path_anchors, path_positives, dataset_size, rgb=True):
    """
    Function to create image pairs on row level and save it to folders on disk
    Takes pairs from different images in the same folder

    Arguments:
        height: desired height of the cropped images - integer
        width: desired width of the cropped images - interger
        crop_width: desired crop width of the row - before resizing to width
        rgb: rgb oder gray images, standard rgb=True
        path_anchor: path of the folder for anchor images to save in on disk - string_format
        path_positives: path of the folder for positive images to save in on disk - string_format
        dataset_size: how many image pairs should be generated and saved on disk - integer
    """

    i = 0
    padding = len(str(dataset_size))

    while i < dataset_size:
        path = (random.choice(dataset_paths))
        image_1, image_2 = random.sample(os.listdir(path), 2)

        if rgb == True:
            try:
                image_1 = Image.open(path + "/" + image_1)
                image_2 = Image.open(path + "/" + image_2)
            except:
                continue
        else:
            try:
                image_1 = Image.open(path + "/" + image_1).convert("L")
                image_2 = Image.open(path + "/" + image_2).convert("L")
            except:
                continue

        try:
            if image_1.size[0] > crop_width and image_2.size[0] > crop_width:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                center_image_1 = int(image_1.size[0] / 2)
                center_image_2 = int(image_2.size[0] / 2)

                x_1 = int(center_image_1 - (crop_width/2))
                x_2 = int(center_image_2 - (crop_width/2))

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                anchor = img_array_1[0:height, x_1:x_1+crop_width]
                positive = img_array_2[0:height, x_2:x_2+crop_width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor = anchor.resize((width, height))
                positive = positive.resize((width, height))

                anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))

                i += 1
            else:
                pass
        except:
            continue

In [None]:
%%time
create_image_pairs_rows_fixed_size(height=224, width=224, crop_width=1000, path_anchors="npz_datasets/pairs_1m_224_224_rows_1000/anchor", path_positives="npz_datasets/pairs_1m_224_224_rows_1000/positive", dataset_size=1000000, rgb=True)

In [60]:
def create_cvl_image_pairs_rows_fixed_size(height, width, crop_width, path_anchors, path_positives, dataset_size, rgb=True):
    """
    Function to create image pairs on row level and save it to folders on disk
    Takes pairs from different images in the same folder

    Arguments:
        height: desired height of the cropped images - integer
        width: desired width of the cropped images - interger
        crop_width: desired crop width of the row - before resizing to width
        rgb: rgb oder gray images, standard rgb=True
        path_anchor: path of the folder for anchor images to save in on disk - string_format
        path_positives: path of the folder for positive images to save in on disk - string_format
        dataset_size: how many image pairs should be generated and saved on disk - integer
    """
    i = 0
    padding = len(str(dataset_size))

    while i < dataset_size:
        cvl_paths = "CVL_Database/lines"
        dir_path = (random.choice(os.listdir(cvl_paths)))
        full_path = (cvl_paths + "/" + dir_path)
        image_1, image_2 = random.sample(os.listdir(full_path), 2)

        if rgb == True:
            try:
                image_1 = Image.open(full_path + "/" + image_1)
                image_2 = Image.open(full_path + "/" + image_2)
            except:
                continue
        else:
            try:
                image_1 = Image.open(full_path + "/" + image_1).convert("L")
                image_2 = Image.open(full_path + "/" + image_2).convert("L")
            except:
                continue

        try:
            if image_1.size[0] > crop_width and image_2.size[0] > crop_width:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                center_image_1 = int(image_1.size[0] / 2)
                center_image_2 = int(image_2.size[0] / 2)

                x_1 = int(center_image_1 - (crop_width/2))
                x_2 = int(center_image_2 - (crop_width/2))

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                anchor = img_array_1[0:height, x_1:x_1+crop_width]
                positive = img_array_2[0:height, x_2:x_2+crop_width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor = anchor.resize((width, height))
                positive = positive.resize((width, height))

                anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))

                i += 1
            else:
                pass
        except:
            continue

In [77]:
%%time
create_cvl_image_pairs_rows_fixed_size(height=224, width=224, crop_width=1000, path_anchors="npz_datasets/test_pairs_cvl_224_224_rows_1000/anchor", path_positives="npz_datasets/test_pairs_cvl_224_224_rows_1000/positive", dataset_size=10000, rgb=True)

CPU times: user 3min 12s, sys: 1.97 s, total: 3min 14s
Wall time: 3min 14s


In [38]:
%%time
widths_list = []
for i in range(len(dataset_paths)):
    image_list = os.listdir(dataset_paths[i])
    for j in range(len(image_list)):
        try:
            image = Image.open(dataset_paths[i] + "/" + image_list[j])
        except:
            continue
        try:
            if image.size[0] < 500:
                pass
            else:
                height_precent = (height / float(image.size[1]))
                resized_width = int((float(image.size[0]) * float(height_precent)))
                image = image.resize((resized_width, height), Image.NEAREST)
                widths_list.append(image.size[0])
        except:
            continue

CPU times: user 9min 41s, sys: 7.14 s, total: 9min 49s
Wall time: 10min 4s


In [48]:
print(f"average sentence length in px: {(int(sum(widths_list) / len(widths_list)))}")
print(f"max sentence length in px: {(int(max(widths_list)))}")
print(f"min sentence length in px: {(int(min(widths_list)))}")
print(f"how many greater then 1000px: {(int(sum(i > 1000 for i in widths_list)))}")
print(f"how many smaller then 1000px: {(int(sum(i < 1000 for i in widths_list)))}")
print(f"how many greater then 1500px: {(int(sum(i > 1500 for i in widths_list)))}")
print(f"how many smaller then 1500px: {(int(sum(i < 1500 for i in widths_list)))}")

average sentence length in px: 1915
max sentence length in px: 7808
min sentence length in px: 414
how many greater then 1000px: 293764
how many smaller then 1000px: 3166
how many greater then 1500px: 193724
how many smaller then 1500px: 102951


In [5]:
def create_image_pairs_rows_fixed_size_ccl213_ccl_214(height, width, crop_width, data_path, save_path):

    data_path_list = os.listdir(data_path)

    for i in range(len(data_path_list)):
    #for i in range(10):

        image = Image.open(data_path + "/" + data_path_list[i])

        if image.size[0] > crop_width and image.size[0] > crop_width:

            height_precent_image = (height / float(image.size[1]))
            resized_width_image = int((float(image.size[0]) * float(height_precent_image)))

            image = image.resize((resized_width_image, height), Image.NEAREST)

            center_image = int(image.size[0] / 2)

            x = int(center_image - (crop_width/2))

            img_array = np.array(image)

            image = img_array[0:height, x:x+crop_width]

            image = Image.fromarray(image)

            image = image.resize((width, height))

            image.save(os.path.join(save_path + "/" + f"{i}.jpg"))

        else:
            pass

In [12]:
%%time
data_path = "Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_259/rows/hand B 259"
save_path = "npz_datasets/handB_259"
create_image_pairs_rows_fixed_size_ccl213_ccl_214(height=224, width=224, crop_width=1000, data_path=data_path, save_path=save_path)

CPU times: user 1min 14s, sys: 945 ms, total: 1min 15s
Wall time: 1min 17s


In [83]:
def create_test_image_pairs(height, width, crop_width, path_anchors, path_positives, dataset_size, dataset_path, rgb=True):

    i = 0
    padding = len(str(dataset_size))

    while i < dataset_size:
        hand_choice = (random.choice(dataset_path))
        hand_path_1, hand_path_2 = random.sample(hand_choice, 2)
        image_1 = random.choice(os.listdir(hand_path_1))
        image_2 = random.choice(os.listdir(hand_path_2))

        if rgb == True:
            try:
                image_1 = Image.open(str(hand_path_1) + "/" + str(image_1))
                image_2 = Image.open(str(hand_path_2) + "/" + str(image_2))
            except:
                continue
        else:
            try:
                image_1 = Image.open(str(hand_path_1) + "/" + str(image_1)).convert("L")
                image_2 = Image.open(str(hand_path_1) + "/" + str(image_1)).convert("L")
            except:
                continue

        try:
            if image_1.size[0] > crop_width and image_2.size[0] > crop_width:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                center_image_1 = int(image_1.size[0] / 2)
                center_image_2 = int(image_2.size[0] / 2)

                x_1 = int(center_image_1 - (crop_width/2))
                x_2 = int(center_image_2 - (crop_width/2))

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                anchor = img_array_1[0:height, x_1:x_1+crop_width]
                positive = img_array_2[0:height, x_2:x_2+crop_width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor = anchor.resize((width, height))
                positive = positive.resize((width, height))

                anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))

                i += 1
            else:
                pass
        except:
            continue

In [158]:
def create_test_image_pairs_2(height, width, crop_width, path_anchors, path_positives, dataset_size, dataset_path, rgb=True):

    i = 0
    padding = len(str(dataset_size))

    while i < dataset_size:
        hand_choice = (random.choice(dataset_test_paths_tests))
        if (len(hand_choice)) <= 1:
            image_1, image_2 = random.sample(os.listdir(hand_choice[0]), 2)
            hand_path_1, hand_path_2 = hand_choice[0], hand_choice[0]
        else:
            hand_path_1, hand_path_2 = random.sample(hand_choice, 2)
            image_1 = random.choice(os.listdir(hand_path_1))
            image_2 = random.choice(os.listdir(hand_path_2))

        if rgb == True:
            try:
                image_1 = Image.open(str(hand_path_1) + "/" + str(image_1))
                image_2 = Image.open(str(hand_path_2) + "/" + str(image_2))
            except:
                continue
        else:
            try:
                image_1 = Image.open(str(hand_path_1) + "/" + str(image_1)).convert("L")
                image_2 = Image.open(str(hand_path_1) + "/" + str(image_1)).convert("L")
            except:
                continue

        try:
            if image_1.size[0] > crop_width and image_2.size[0] > crop_width:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                center_image_1 = int(image_1.size[0] / 2)
                center_image_2 = int(image_2.size[0] / 2)

                x_1 = int(center_image_1 - (crop_width/2))
                x_2 = int(center_image_2 - (crop_width/2))

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                anchor = img_array_1[0:height, x_1:x_1+crop_width]
                positive = img_array_2[0:height, x_2:x_2+crop_width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor = anchor.resize((width, height))
                positive = positive.resize((width, height))

                anchor.save(os.path.join(path_anchors + "/" + f"{i:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{i:0{padding}}.jpg"))

                i += 1
            else:
                pass
        except:
            continue

In [159]:
%%time
create_test_image_pairs_2(height=224, width=224, crop_width=1000, path_anchors="npz_datasets/pairs_250k_224_224_rows_1000_test/anchor", path_positives="npz_datasets/pairs_250k_224_224_rows_1000_test/positive", dataset_size=250000, dataset_path=dataset_test_paths, rgb=True)

CPU times: user 1h 19min 9s, sys: 10min 45s, total: 1h 29min 54s
Wall time: 1h 30min 45s


In [3]:
def create_test_image_pairs_combined_list_excluded(height, width, crop_width, ground_path,path_anchors, path_positives, examples_per_pair, dataset_path_positives, dataset_path_negatives, rgb=True):

    #calculate length for padding of saved image numbers
    len_dataset = (len(dataset_path_positives) * examples_per_pair) * 2
    len_dataset = len_dataset + 180
    padding = len(str(len_dataset))
    overall_count = 0
    labels = []
    for i in range(len(dataset_path_positives)):
        j = 0
        while j <= examples_per_pair:
            image_1 = random.choice(os.listdir(dataset_path_positives[i][0]))
            image_2 = random.choice(os.listdir(dataset_path_positives[i][1]))
            image_1_path = dataset_path_positives[i][0] + "/" + image_1
            image_2_path = dataset_path_positives[i][1] + "/" + image_2

            if rgb == True:
                try:
                    image_1 = Image.open(str(image_1_path))
                    image_2 = Image.open(str(image_2_path))
                except:
                    continue
            else:
                try:
                    image_1 = Image.open(str(image_1_path)).convert("L")
                    image_2 = Image.open(str(image_2_path)).convert("L")
                except:
                    continue
            try:
                if image_1.size[0] > crop_width and image_2.size[0] > crop_width:
                    #resize the images to specific height and keeping the same aspect ratio
                    height_precent_image_1 = (height / float(image_1.size[1]))
                    height_precent_image_2 = (height / float(image_2.size[1]))

                    resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                    resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                    image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                    image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                    center_image_1 = int(image_1.size[0] / 2)
                    center_image_2 = int(image_2.size[0] / 2)

                    x_1 = int(center_image_1 - (crop_width/2))
                    x_2 = int(center_image_2 - (crop_width/2))

                    #crop randomly  image out of source image
                    img_array_1 = np.array(image_1)
                    img_array_2 = np.array(image_2)

                    anchor = img_array_1[0:height, x_1:x_1+crop_width]
                    positive = img_array_2[0:height, x_2:x_2+crop_width]

                    anchor = Image.fromarray(anchor)
                    positive = Image.fromarray(positive)

                    anchor = anchor.resize((width, height))
                    positive = positive.resize((width, height))

                    anchor.save(os.path.join(path_anchors + "/" + f"{overall_count:0{padding}}.jpg"))
                    positive.save(os.path.join(path_positives + "/" + f"{overall_count:0{padding}}.jpg"))

                    j += 1
                    overall_count += 1
                    labels.append(1)
                else:
                    pass
            except:
                continue
    i = 0
    while i < (labels.count(1)):
        hand_1, hand_2 = random.sample(dataset_path_negatives, 2)
        hand_1 = random.choice(hand_1)
        hand_2 = random.choice(hand_2)
        image_1 = random.choice(os.listdir(hand_1))
        image_2 = random.choice(os.listdir(hand_2))
        image_1_path = hand_1 + "/" + image_1
        image_2_path = hand_2 + "/" + image_2

        if rgb == True:
            try:
                image_1 = Image.open(str(image_1_path))
                image_2 = Image.open(str(image_2_path))
            except:
                continue
        else:
            try:
                image_1 = Image.open(str(image_1_path)).convert("L")
                image_2 = Image.open(str(image_2_path)).convert("L")
            except:
                continue
        try:
            if image_1.size[0] > crop_width and image_2.size[0] > crop_width:
                #resize the images to specific height and keeping the same aspect ratio
                height_precent_image_1 = (height / float(image_1.size[1]))
                height_precent_image_2 = (height / float(image_2.size[1]))

                resized_width_image_1 = int((float(image_1.size[0]) * float(height_precent_image_1)))
                resized_width_image_2 = int((float(image_2.size[0]) * float(height_precent_image_2)))

                image_1 = image_1.resize((resized_width_image_1, height), Image.NEAREST)
                image_2 = image_2.resize((resized_width_image_2, height), Image.NEAREST)

                center_image_1 = int(image_1.size[0] / 2)
                center_image_2 = int(image_2.size[0] / 2)

                x_1 = int(center_image_1 - (crop_width/2))
                x_2 = int(center_image_2 - (crop_width/2))

                #crop randomly  image out of source image
                img_array_1 = np.array(image_1)
                img_array_2 = np.array(image_2)

                anchor = img_array_1[0:height, x_1:x_1+crop_width]
                positive = img_array_2[0:height, x_2:x_2+crop_width]

                anchor = Image.fromarray(anchor)
                positive = Image.fromarray(positive)

                anchor = anchor.resize((width, height))
                positive = positive.resize((width, height))

                anchor.save(os.path.join(path_anchors + "/" + f"{overall_count:0{padding}}.jpg"))
                positive.save(os.path.join(path_positives + "/" + f"{overall_count:0{padding}}.jpg"))

                i += 1
                overall_count += 1
                labels.append(0)
            else:
                pass
        except:
            continue
    with open(ground_path + "/" + 'labels.txt', 'w') as f:
        for item in labels:
            f.write("%s\n" % item)

In [5]:
%%time
create_test_image_pairs_combined_list_excluded(height=224, width=224, crop_width=1000, ground_path="npz_datasets/test_dataset_A215_B20_not_excluded_test",  path_anchors="npz_datasets/test_dataset_A215_B20_not_excluded_test/anchor", path_positives="npz_datasets/test_dataset_A215_B20_not_excluded_test/positive", dataset_path_positives=combined_list, dataset_path_negatives=dataset_test_paths,examples_per_pair=50, rgb=True)

CPU times: user 3min 43s, sys: 1min 23s, total: 5min 6s
Wall time: 5min 11s


In [3]:
combined_list

[('Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_31/rows/hand A 30'),
 ('Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_197/rows/hand A 30'),
 ('Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_206/rows/hand A 30'),
 ('Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_226/rows/hand A 30'),
 ('Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_246/rows/hand A 30'),
 ('Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_256/rows/hand A 30'),
 ('Dataset_22092021_lina/SCID

In [4]:
dataset_test_paths

[['Dataset_22092021_lina/SCID/Data_A/processed/rgb_rows/ccl_30/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_31/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_197/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_206/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_226/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_246/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_256/rows/hand A 30',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_257/rows/hand A 30'],
 ['Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_209/rows/hand A 259',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_259/rows/hand A 259',
  'Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_949/rows/hand A 259'],
 ['Dataset_22092021_lina/SCID/Data_B/processed/rgb_rows/ccl_259/rows/hand B 259',
  'Dataset_22092021_lina