In [1]:
import cv2
import numpy as np
import os
from tqdm import tqdm
from IPython.display import clear_output
import random
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import shutil
import yaml

In [2]:
from spiral_tools.utils import path_sequence

####  yaml dosyası formatı

train: images/train
val: images/val
test: images/test

names:
  0: class_one
  1: class_two
  2: class_three

In [3]:
def split_dataset(image_file_list,label_file_list,test_size=0.1,val_size=0.1):
    X_train, X_test, y_train, y_test = train_test_split(image_file_list, label_file_list, test_size=test_size, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size, random_state=42)
    return X_train, X_test, X_val, y_train, y_test, y_val

#### Yolo veriseti formatında dosyaları oluşturalım

In [4]:
# Veri Seti İsmi
print("Dataset İsmi Girin")
new_ds_name = str(input("> "))
new_ds_name = new_ds_name.strip()
new_ds_name = '_'.join(new_ds_name.split(" "))
clear_output()
print("> ",new_ds_name)

# detect.yaml dosyası
detect_yaml = {
    'train': 'images/train',
    'val': 'images/val',
    'test': 'images/test',
    'names': {}
}

print("Sırası ile etiketleri girin")
print("Bitirmek için bitir yazın")
counter = 0 
while True:
    k  = input(f"{counter} -> Değer:")
    if k != "bitir":
        detect_yaml["names"][counter] = k
        counter += 1
    else:
        break
print(detect_yaml)

# Dosyaları Oluştur
ds_count = len(os.listdir(f"./merged"))
new_file_path = f"./merged/{ds_count}_{new_ds_name}"

if os.path.exists(new_file_path) == False:
    os.mkdir(new_file_path)
    os.mkdir(path_sequence(new_file_path,"detect"))
    with open(path_sequence(new_file_path,"detect","detect.yaml"),"w") as f:
        yaml.dump(detect_yaml,f, default_flow_style=False)
    for i in ["images","labels"]:
        os.mkdir(path_sequence(new_file_path,"detect",i))
        for j in ["test","train","val"]:
            os.mkdir(path_sequence(new_file_path,"detect",i,j))
    print(new_ds_name,"oluşturuldu")
else:
    print("Bu dosya zaten var")

>  SpirAl_Vision_V4_edited
Sırası ile etiketleri girin
Bitirmek için bitir yazın
0 -> Değer:tasit
1 -> Değer:insan
2 -> Değer:UAP
3 -> Değer:UAI
4 -> Değer:bitir
{'train': 'images/train', 'val': 'images/val', 'test': 'images/test', 'names': {0: 'tasit', 1: 'insan', 2: 'UAP', 3: 'UAI'}}
SpirAl_Vision_V4_edited oluşturuldu


# İŞLEM

In [5]:
root =  "./datasets"
ds_names_list = os.listdir(root)
for ds_name in tqdm(ds_names_list):
    
    clear_output()
    print("> işleniyor:",ds_name)
    for i,part in tqdm(enumerate(["val","test","train"])):
        images_file_path = path_sequence(root,ds_name,"detect","images",part)
        labels_file_path = path_sequence(root,ds_name,"detect","labels",part)        
        print(">> ",images_file_path,labels_file_path)
        
        image_names_list = os.listdir(images_file_path)
        label_names_list = os.listdir(labels_file_path)
        print(">> Frame Dosyası Sayısı ",len(image_names_list)," Etiket Dosyası Sayısı >",len(label_names_list),"\n")
        if len(image_names_list) > 3 and len(label_names_list) > 3:
            images_train, images_test, images_val, labels_train, labels_test, labels_val = split_dataset(image_names_list,label_names_list)


            # train olarak bölütlenmiş görüntü ve etiketleri kopyala
            for im_file, lbl_file in zip(images_train,labels_train):
                im_source_path = path_sequence(images_file_path,im_file)
                im_target_path = path_sequence(new_file_path,"detect","images","train",f"{ds_name}_{im_file}")
                shutil.copyfile(im_source_path,im_target_path)

                lbl_source_path = path_sequence(labels_file_path,lbl_file)
                lbl_target_path = path_sequence(new_file_path,"detect","labels","train",f"{ds_name}_{lbl_file}")
                shutil.copyfile(lbl_source_path,lbl_target_path)

            # test olarak bölütlenmiş görüntü ve etiketleri kopyala
            for im_file, lbl_file in zip(images_test,labels_test):
                im_source_path = path_sequence(images_file_path,im_file)
                im_target_path = path_sequence(new_file_path,"detect","images","test",f"{ds_name}_{im_file}")
                shutil.copyfile(im_source_path,im_target_path)

                lbl_source_path = path_sequence(labels_file_path,lbl_file)
                lbl_target_path = path_sequence(new_file_path,"detect","labels","test",f"{ds_name}_{lbl_file}")
                shutil.copyfile(lbl_source_path,lbl_target_path)

            # val olarak bölütlenmiş görüntü ve etiketleri kopyala
            for im_file, lbl_file in zip(images_val,labels_val):
                im_source_path = path_sequence(images_file_path,im_file)
                im_target_path = path_sequence(new_file_path,"detect","images","val",f"{ds_name}_{im_file}")
                shutil.copyfile(im_source_path,im_target_path)

                lbl_source_path = path_sequence(labels_file_path,lbl_file)
                lbl_target_path = path_sequence(new_file_path,"detect","labels","val",f"{ds_name}_{lbl_file}")
                shutil.copyfile(lbl_source_path,lbl_target_path)  
        else:
            print(">>> Bu dosyada veri bulunmuyor: ",ds_name," ",part)



> işleniyor: Visdrone_distil



0it [00:00, ?it/s][A

>>  ./datasets/Visdrone_distil/detect/images/val ./datasets/Visdrone_distil/detect/labels/val
>> Frame Dosyası Sayısı  105  Etiket Dosyası Sayısı > 105 




1it [00:00,  2.50it/s][A

>>  ./datasets/Visdrone_distil/detect/images/test ./datasets/Visdrone_distil/detect/labels/test
>> Frame Dosyası Sayısı  917  Etiket Dosyası Sayısı > 917 




2it [00:01,  1.07s/it][A

>>  ./datasets/Visdrone_distil/detect/images/train ./datasets/Visdrone_distil/detect/labels/train
>> Frame Dosyası Sayısı  2566  Etiket Dosyası Sayısı > 2566 




3it [00:06,  2.03s/it][A
100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [00:51<00:00,  4.70s/it]


Verileri Saydırmak

In [6]:
root =  "./datasets"
ds_names_list = os.listdir(root)

sum_ = 0 
for ds_name in tqdm(ds_names_list):
    
    print("> işleniyor:",ds_name)
    for i,part in tqdm(enumerate(["val","test","train"])):
        images_file_path = path_sequence(root,ds_name,"detect","images",part)
        print(i,images_file_path)
        print(len(os.listdir(images_file_path)))
        sum_ += len(os.listdir(images_file_path))
        

  0%|                                                                                            | 0/9 [00:00<?, ?it/s]

> işleniyor: Aerial_Maritime



3it [00:00, 333.36it/s]


0 ./datasets/Aerial_Maritime/detect/images/val
19
1 ./datasets/Aerial_Maritime/detect/images/test
14
2 ./datasets/Aerial_Maritime/detect/images/train
70
> işleniyor: Augmanted_frames_ds



0it [00:00, ?it/s][A
3it [00:00, 19.54it/s][A
 22%|██████████████████▋                                                                 | 2/9 [00:00<00:00, 11.94it/s]

0 ./datasets/Augmanted_frames_ds/detect/images/val
396
1 ./datasets/Augmanted_frames_ds/detect/images/test
464
2 ./datasets/Augmanted_frames_ds/detect/images/train
3496
> işleniyor: UYZ_2021_etiketli



0it [00:00, ?it/s][A

0 ./datasets/UYZ_2021_etiketli/detect/images/val
0
1 ./datasets/UYZ_2021_etiketli/detect/images/test
0
2 ./datasets/UYZ_2021_etiketli/detect/images/train


3it [00:00, 32.45it/s]


3279
> işleniyor: UYZ_2022_OTURUM2_1



3it [00:00, 76.19it/s]
 44%|█████████████████████████████████████▎                                              | 4/9 [00:00<00:00, 13.38it/s]

0 ./datasets/UYZ_2022_OTURUM2_1/detect/images/val
0
1 ./datasets/UYZ_2022_OTURUM2_1/detect/images/test
0
2 ./datasets/UYZ_2022_OTURUM2_1/detect/images/train
1046
> işleniyor: UYZ_2022_OTURUM2_2



3it [00:00, 673.86it/s]


0 ./datasets/UYZ_2022_OTURUM2_2/detect/images/val
0
1 ./datasets/UYZ_2022_OTURUM2_2/detect/images/test
0
2 ./datasets/UYZ_2022_OTURUM2_2/detect/images/train
549
> işleniyor: UYZ_2022_OTURUM2_3



3it [00:00, 1499.75it/s]


0 ./datasets/UYZ_2022_OTURUM2_3/detect/images/val
0
1 ./datasets/UYZ_2022_OTURUM2_3/detect/images/test
0
2 ./datasets/UYZ_2022_OTURUM2_3/detect/images/train
541
> işleniyor: UYZ_2022_OTURUM2_4



3it [00:00, 416.25it/s]


0 ./datasets/UYZ_2022_OTURUM2_4/detect/images/val
0
1 ./datasets/UYZ_2022_OTURUM2_4/detect/images/test
0
2 ./datasets/UYZ_2022_OTURUM2_4/detect/images/train
724
> işleniyor: UYZ_2022_OTURUM2_5



3it [00:00, 748.76it/s]


0 ./datasets/UYZ_2022_OTURUM2_5/detect/images/val
0
1 ./datasets/UYZ_2022_OTURUM2_5/detect/images/test
0
2 ./datasets/UYZ_2022_OTURUM2_5/detect/images/train
1163
> işleniyor: visdrone



3it [00:00, 93.78it/s]

0 ./datasets/visdrone/detect/images/val
548
1 ./datasets/visdrone/detect/images/test
1610
2 ./datasets/visdrone/detect/images/train
6471



100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 24.65it/s]


In [7]:
sum_

20390