In [1]:
import os
import glob
import random
from shutil import copyfile

## Find images path

In [2]:
carvansara_images = []
for file in glob.glob("../dataset/carvansara_600x600/carvansara/*.png"):
    carvansara_images.append(file)
random.shuffle(carvansara_images)

other_images = []
for file in glob.glob("../dataset/carvansara_600x600/other/*.png"):
    other_images.append(file)
random.shuffle(other_images)

num_carvansara_images = len(carvansara_images)
num_other_images = len(other_images)
num_total = num_carvansara_images + num_other_images

print(f"Number of carvansara images: {num_carvansara_images} [{100*num_carvansara_images/num_total:0.2f}%]")
print(f"Number of other images:      {num_other_images} [{100*num_other_images/num_total:0.2f}%]")

Number of carvansara images: 319 [29.87%]
Number of other images:      749 [70.13%]


## Split images to Validation[20%] and Train[80%] 

In [3]:
num_val_carvansara = 20*num_carvansara_images//100
valid_carvansara = []

for image_path in carvansara_images[:]:
    image_name = os.path.basename(image_path)
    base_name = image_name.replace("_bing_600x600.png","") if "bing"  in image_name  else image_name.replace("_google_600x600.png","")
    valid_carvansara += [image for image in carvansara_images if base_name in image]
    if len(valid_carvansara) >  num_val_carvansara:
        break
train_carvansara = [image for image in carvansara_images if image not in valid_carvansara]
print(f"Number of carvansara images in validation set: {len(valid_carvansara)}")
print(f"Number of carvansara images in train set:      {len(train_carvansara)}")

Number of carvansara images in validation set: 65
Number of carvansara images in train set:      256


In [4]:
num_val_other = 20*num_other_images//100
valid_other = []


for image_path in other_images[:]:
    image_name = os.path.basename(image_path)
    base_name = image_name.replace("_bing_600x600.png","") if "bing"  in image_name  else image_name.replace("_google_600x600.png","")
    valid_other += [image for image in other_images if base_name in image]
    if len(valid_other) >  num_val_other:
        break
train_other = [image for image in other_images if image not in valid_other]
print(f"Number of other images in validation set: {len(valid_other)}")
print(f"Number of other images in train set:      {len(train_other)}")

Number of other images in validation set: 150
Number of other images in train set:      611


In [5]:
train_path = os.path.join("../dataset/carvansara_yolo", "train")
test_path = os.path.join("../dataset/carvansara_yolo", "test")

for path in [train_path,test_path]:
    for subdir in ["images", "labels"]:
        
        if not os.path.exists(os.path.join(path, subdir)):
            os.makedirs(os.path.join(path, subdir))

In [6]:
for image in train_other:
    image_name = os.path.basename(image)[:-4]
    with open(f"{train_path}/labels/{image_name}.txt", "w") as f:
            f.write("")
    copyfile(image, os.path.join(train_path, "images", os.path.basename(image)))

In [7]:
for image in valid_other:
    image_name = os.path.basename(image)[:-4]
    with open(f"{test_path}/labels/{image_name}.txt", "w") as f:
            f.write("")
    copyfile(image, os.path.join(test_path, "images", os.path.basename(image)))

In [8]:
for image in train_carvansara:
    image_name = os.path.basename(image)[:-4]
    label_path = os.path.join("../dataset/carvansara_600x600", "yolo_labels", f"{image_name}.txt")
    copyfile(label_path, os.path.join(train_path, "labels", f"{image_name}.txt"))
    
    copyfile(image, os.path.join(train_path, "images", os.path.basename(image)))

In [9]:
for image in valid_carvansara:
    image_name = os.path.basename(image)[:-4]
    label_path = os.path.join("../dataset/carvansara_600x600", "yolo_labels", f"{image_name}.txt")
    copyfile(label_path, os.path.join(test_path, "labels", f"{image_name}.txt"))
    
    copyfile(image, os.path.join(test_path, "images", os.path.basename(image)))