In [3]:
import os
import glob
import random
from shutil import copyfile

## Find images path

In [4]:
dataset_base_path = "../dataset/carvansara_600x600"

In [5]:
carvansara_images = []
for file in glob.glob(os.path.join(dataset_base_path, "carvansara/*.png")):
    carvansara_images.append(file)
random.shuffle(carvansara_images)

other_images = []
for file in glob.glob(os.path.join(dataset_base_path, "other/*.png")):
    other_images.append(file)
random.shuffle(other_images)

num_carvansara_images = len(carvansara_images)
num_other_images = len(other_images)
num_total = num_carvansara_images + num_other_images

print(f"Number of carvansara images: {num_carvansara_images} [{100*num_carvansara_images/num_total:0.2f}%]")
print(f"Number of other images:      {num_other_images} [{100*num_other_images/num_total:0.2f}%]")

Number of carvansara images: 319 [29.87%]
Number of other images:      749 [70.13%]


## Split images to Validation[20%] and Train[80%] 

In [4]:
num_val_carvansara = 20*num_carvansara_images//100
num_bing_carvansara = 0
num_google_carvansara = 0
valid_carvansara = []
names = []

for image in carvansara_images:
    if ("bing" in image) and (image.replace("_bing_600x600.png","") not in names) and (num_bing_carvansara < num_val_carvansara//2):
        num_bing_carvansara +=1
        names.append(image.replace("_bing_300x300.png",""))
        valid_carvansara.append(image)
    elif ("google" in image) and (image.replace("_google_600x600.png","") not in names) and (num_google_carvansara < num_val_carvansara//2):
        names.append(image.replace("_google_600x600.png",""))
        valid_carvansara.append(image)
        num_google_carvansara +=1
train_carvansara = [image for image in carvansara_images if image not in valid_carvansara]
print(f"Number of carvansara images in validation set: {len(valid_carvansara)}")
print(f"Number of carvansara images in train set:      {len(train_carvansara)}")

Number of carvansara images in validation set: 62
Number of carvansara images in train set:      257


In [5]:
num_val_other = 20*num_other_images//100
num_bing_other = 0
num_google_other = 0
valid_other = []
names = []

for image in other_images:
    if ("bing" in image) and (image.replace("_bing_600x600.png","") not in names) and (num_bing_other < num_val_other//2):
        num_bing_other +=1
        names.append(image.replace("_bing_300x300.png",""))
        valid_other.append(image)
    elif ("google" in image) and (image.replace("_google_600x600.png","") not in names) and (num_google_other < num_val_other//2):
        names.append(image.replace("_google_600x600.png",""))
        valid_other.append(image)
        num_google_other +=1
train_other = [image for image in other_images if image not in valid_other]
print(f"Number of other images in validation set: {len(valid_other)}")
print(f"Number of other images in train set:      {len(train_other)}")

Number of other images in validation set: 148
Number of other images in train set:      601


In [6]:
train_path = os.path.join("../dataset/carvansara_yolo", "train")
test_path = os.path.join("../dataset/carvansara_yolo", "test")

for path in [train_path,test_path]:
    for subdir in ["images", "labels"]:
        
        if not os.path.exists(os.path.join(path, subdir)):
            os.makedirs(os.path.join(path, subdir))

In [7]:
for image in train_other:
    image_name = os.path.basename(image)[:-4]
    with open(f"{train_path}/labels/{image_name}.txt", "w") as f:
            f.write("")
    copyfile(image, os.path.join(train_path, "images", os.path.basename(image)))

In [9]:
for image in valid_other:
    image_name = os.path.basename(image)[:-4]
    with open(f"{test_path}/labels/{image_name}.txt", "w") as f:
            f.write("")
    copyfile(image, os.path.join(test_path, "images", os.path.basename(image)))

In [10]:
for image in train_carvansara:
    image_name = os.path.basename(image)[:-4]
    label_path = os.path.join(dataset_base_path, "yolo_labels", f"{image_name}.txt")
    copyfile(label_path, os.path.join(train_path, "labels", f"{image_name}.txt"))
    
    copyfile(image, os.path.join(train_path, "images", os.path.basename(image)))

In [11]:
for image in valid_carvansara:
    image_name = os.path.basename(image)[:-4]
    label_path = os.path.join(dataset_base_path, "yolo_labels", f"{image_name}.txt")
    copyfile(label_path, os.path.join(test_path, "labels", f"{image_name}.txt"))
    
    copyfile(image, os.path.join(test_path, "images", os.path.basename(image)))