In [1]:
import os
import glob
import random
from shutil import copyfile

## Find images path

In [2]:
carvansara_images = []
for file in glob.glob("../dataset/carvansara_600x600/carvansara/*.png"):
    carvansara_images.append(file)
random.shuffle(carvansara_images)

other_images = []
for file in glob.glob("../dataset/carvansara_600x600/other/*.png"):
    other_images.append(file)
random.shuffle(other_images)

num_carvansara_images = len(carvansara_images)
num_other_images = len(other_images)
num_total = num_carvansara_images + num_other_images

print(f"Number of carvansara images: {num_carvansara_images} [{100*num_carvansara_images/num_total:0.2f}%]")
print(f"Number of other images:      {num_other_images} [{100*num_other_images/num_total:0.2f}%]")

Number of carvansara images: 319 [29.87%]
Number of other images:      749 [70.13%]


## Split images to Validation[20%] and Train[80%] 

In [3]:
num_val_carvansara = 20*num_carvansara_images//100
valid_carvansara = []

for image_path in carvansara_images[:]:
    image_name = os.path.basename(image_path)
    base_name = image_name.replace("_bing_600x600.png","") if "bing"  in image_name  else image_name.replace("_google_600x600.png","")
    valid_carvansara += [image for image in carvansara_images if base_name in image]
    if len(valid_carvansara) >  num_val_carvansara:
        break
train_carvansara = [image for image in carvansara_images if image not in valid_carvansara]
print(f"Number of carvansara images in validation set: {len(valid_carvansara)}")
print(f"Number of carvansara images in train set:      {len(train_carvansara)}")

Number of carvansara images in validation set: 64
Number of carvansara images in train set:      258


In [4]:
valid_carvansara.sort()

In [5]:
valid_carvansara[:10]

['../dataset/carvansara_600x600/carvansara/001_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/001_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/003_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/003_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/007_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/007_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/008_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/008_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/020_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/020_google_600x600.png']

In [6]:
num_val_other = 20*num_other_images//100
valid_other = []


for image_path in other_images[:]:
    image_name = os.path.basename(image_path)
    base_name = image_name.replace("_bing_600x600.png","") if "bing"  in image_name  else image_name.replace("_google_600x600.png","")
    valid_other += [image for image in other_images if base_name in image]
    if len(valid_other) >  num_val_other:
        break
train_other = [image for image in other_images if image not in valid_other]
print(f"Number of other images in validation set: {len(valid_other)}")
print(f"Number of other images in train set:      {len(train_other)}")

Number of other images in validation set: 150
Number of other images in train set:      605


In [7]:
train_other.sort()

In [8]:
valid_carvansara[:10]

['../dataset/carvansara_600x600/carvansara/001_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/001_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/003_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/003_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/007_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/007_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/008_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/008_google_600x600.png',
 '../dataset/carvansara_600x600/carvansara/020_bing_600x600.png',
 '../dataset/carvansara_600x600/carvansara/020_google_600x600.png']

In [9]:
caransarea_train_path = os.path.join("../dataset/carvansara_dataset", "train", "carvansara")
caransarea_val_path = os.path.join("../dataset/carvansara_dataset", "valid", "carvansara")
other_train_path = os.path.join("../dataset/carvansara_dataset", "train", "other")
other_val_path = os.path.join("../dataset/carvansara_dataset", "valid", "other")

for path in [caransarea_train_path, caransarea_val_path, other_train_path, other_val_path]:
    if not os.path.exists(path):
        os.makedirs(path)

In [10]:
for image in train_other:
    copyfile(image, os.path.join(other_train_path, os.path.basename(image)))

In [11]:
for image in train_carvansara:
    copyfile(image, os.path.join(caransarea_train_path, os.path.basename(image)))

In [12]:
for image in valid_other:
    copyfile(image, os.path.join(other_val_path, os.path.basename(image)))

In [13]:
for image in valid_carvansara:
    copyfile(image, os.path.join(caransarea_val_path, os.path.basename(image)))