In [19]:
import shutil
import os
import numpy as np
from PIL import Image
#import argparse

#files = os.listdir(path)
#list_images = np.asarray(files)

In [162]:
'''
Split the files in AOLP dataset in test-train-validate subsets
'''

#defining the necessary directories
sub_directory = "RP"
directory = r'D:\datasets\AOLP\Subset_{}\Image'.format(sub_directory)
annotation_directory = r'D:\datasets\AOLP\Subset_{}\yolo_annotation'.format(sub_directory)
save_directory = r'D:\datasets\test-Train\{}'.format(sub_directory)


#calculate the number of files for test-train-validate split
num_total_files = len([entry for entry in os.listdir(directory) if os.path.isfile(os.path.join(directory, entry))]) - 1
num_train_files = int(num_total_files * 0.7)
num_val_files = int(num_total_files * 0.2)
num_test_files = num_total_files - num_train_files - num_val_files

#lets randomise the list so that consecutive images don't end up in same category
total_files = list(range(1,num_total_files + 1))
np.random.shuffle(total_files)

train_files = total_files[0:num_train_files]
val_files = total_files[num_train_files: num_train_files + num_val_files]
test_files = total_files[num_train_files + num_val_files:]



for image in os.listdir(directory):
    if image.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
        imagename = int(image[:-4]) #removing the extention
        src = os.path.join(directory, image)
        annotation_file = image[:-3] + "txt"
        src_annotation = os.path.join(annotation_directory, annotation_file)
        
        if imagename in train_files: 
            dst = os.path.join(save_directory, "train", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "train", "{}_".format(sub_directory) + annotation_file)
        if imagename in val_files:
            dst = os.path.join(save_directory, "val", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "val", "{}_".format(sub_directory) + annotation_file)
        if imagename in test_files:
            dst = os.path.join(save_directory, "test", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "test", "{}_".format(sub_directory) + annotation_file)
           

        shutil.copy(src, dst)
        shutil.copy(src_annotation, dst_annotation)
    

In [114]:
'''
Create yolo annotations for AOLP dataset
'''


base_directory = r'D:\datasets\AOLP\Subset_LE'
annotation_directory = os.path.join(base_directory, 'groundtruth_localization')
image_directory = os.path.join(base_directory, 'Image')
yolo_annotation_directory = os.path.join(base_directory, 'yolo_annotation')

for file in os.listdir(annotation_directory):
    
    if '_' in file:
        #because images having multiple license plates have multiple txt files with 
        #associated location
        image_name = file.split("_",1)[0] + ".jpg"
    else:
        image_name = file[:-3] + "jpg"
        
    #all images do not have a standard size
    image_path = os.path.join(image_directory, image_name)
    im = Image.open(image_path)
    w = im.size[0]
    h = im.size[1]
    
    txt_path = os.path.join(annotation_directory, file)
    with open(txt_path) as f:
        lines = f.readlines()
    
    bbox = None 
    for i in range(len(lines)):
        lines[i] = lines[i].replace("\n", "")
        if len(lines[i]) < 5:
            lines[i] = int(lines[i])
        elif 'e' in lines[i]:
            #some annotations have exponential instead of direct int as location for some reason
            #1.1100e+2 = 111  1.1100e+1 = 11
            exp = lines[i].split("+",1)[1]
            exp = int(exp)
            
            lines[i] = lines[i][0:4]
            if exp == 1:
                lines_float = float(lines[i]) * 10
                lines[i] = int(lines_float)
            elif exp == 2:
                lines_float = float(lines[i]) * 100
                lines[i] = int(lines_float)
            else:
                print("Error with file: {}".format(file))
        else:
            #some annotation files also have float instead of simple integers
            lines[i] = lines[i][0:4]
            lines_float = float(lines[i]) * 100
            lines[i] = int(lines_float)
        
            
    x1 = lines[0]    
    y1 = lines[1]
    x2 = lines[2]
    y2 = lines[3]
    
    if len(lines) > 4:
        print(file)
    
    x_center = ((x1 + x2) / 2) / w
    y_center = ((y1 + y2) / 2) / h
    width = abs((x2 - x1) / w)
    height = abs((y2 - y1) / h)
    
    if '_' in file:
        #if there are multiple license plates, we append the file instead of making a new one
        file = file.split("_",1)[0] + ".txt"
        yolo_file = os.path.join(yolo_annotation_directory, file)
        with open(yolo_file, 'a') as f:
            f.write("\n0 {} {} {} {}".format(x_center, y_center, width, height))
    else:
        yolo_file = os.path.join(yolo_annotation_directory, file)
        with open(yolo_file, 'w') as f:
            f.write("0 {} {} {} {}".format(x_center, y_center, width, height))
    
  

In [196]:
'''
create yolo annotation files for UCSD dataset
from training_set.txt and test_set.txt
'''

#define necessary directories
txt_path = r'D:\datasets\UCSD\data\cars\stills\test_set.txt'
image_directory = r'D:\datasets\UCSD\data\cars\stills\images'
yolo_annotation_directory = r'D:\datasets\UCSD\data\cars\stills\yolo_annotations'

#read the test_set.txt or training_set.txt
with open(txt_path) as f:
    lines = f.readlines()

#the info for one image is contained in 2 lines
#there step = 2
for i in range (0, len(lines), 2):
    #first line contains the relative address of image
    imagename = lines[i]
    imagename = imagename.split("/",2)[2] 
    imagename = imagename.split(".",1)[0] 

    #second line contains the location of license plate
    j = i+1
    annotation = lines[j]
    x1 = int(annotation.split(",",3)[0])
    y1 = int(annotation.split(",",3)[1])
    x2 = int(annotation.split(",",3)[2])
    y2 = int(annotation.split(",",3)[3][:-1])
    
    image_path = os.path.join(image_directory, imagename + ".jpg")
    im = Image.open(image_path)
    w = im.size[0]
    h = im.size[1]
    
    x_center = ((x1 + x2) / 2) / w
    y_center = ((y1 + y2) / 2) / h
    width = abs((x2 - x1) / w)
    height = abs((y2 - y1) / h)
    
    yolo_file = os.path.join(yolo_annotation_directory, imagename + ".txt")
    with open(yolo_file, 'a') as f:
        f.write("\n0 {} {} {} {}".format(x_center, y_center, width, height))

In [203]:
'''
renaming a bunch of files
'''

image_path = r'D:\datasets\UCSD\data\cars\stills\images'
anno_path = r'D:\datasets\UCSD\data\cars\stills\yolo_annotations'
new_path = r'D:\datasets\UCSD\data\cars\stills\New folder'

for files in os.listdir(new_path):
    if files.lower().endswith('.txt'):
        src = os.path.join(new_path, files)
        files = files.split("_",1)[0]
        dst = os.path.join(new_path, files + ".txt")
        
        os.rename(src, dst)
        

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'D:\\datasets\\UCSD\\data\\cars\\stills\\New folder\\cars4_002_jpg.rf.7c820685ab83b122c62fd7f033d63f57.txt' -> 'D:\\datasets\\UCSD\\data\\cars\\stills\\New folder\\cars4.txt'

In [223]:
'''
Split the files in UCSD dataset in test-train-validate subsets
'''

#defining the necessary directories
sub_directory = 'UCSD'
directory = r'D:\datasets\UCSD\data\cars\stills\images'
annotation_directory = r'D:\datasets\UCSD\data\cars\stills\yolo_annotations'
save_directory = r'D:\datasets\test-Train\UCSD'


#calculate the number of files for test-train-validate split
num_total_files = len([entry for entry in os.listdir(directory) if os.path.isfile(os.path.join(directory, entry))]) - 1
num_train_files = int(num_total_files * 0.7)
num_val_files = int(num_total_files * 0.2)
num_test_files = num_total_files - num_train_files - num_val_files

#lets create and randomise the list so that consecutive images don't end up in same category
total_files = []
for images in os.walk(directory):
    total_files.append(images)
total_files = total_files[0][2]
np.random.shuffle(total_files)

train_files = total_files[0:num_train_files]
val_files = total_files[num_train_files: num_train_files + num_val_files]
test_files = total_files[num_train_files + num_val_files:]



for image in os.listdir(directory):
    if image.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
        imagename = image[:-4] #removing the extention
        src = os.path.join(directory, image)
        annotation_file = image[:-3] + "txt"
        src_annotation = os.path.join(annotation_directory, annotation_file)
        print(imagename)
        if image in train_files: 
            dst = os.path.join(save_directory, "train", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "train", "{}_".format(sub_directory) + annotation_file)
        if image in val_files:
            dst = os.path.join(save_directory, "val", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "val", "{}_".format(sub_directory) + annotation_file)
        if image in test_files:
            dst = os.path.join(save_directory, "test", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "test", "{}_".format(sub_directory) + annotation_file)
           

        shutil.copy(src, dst)
        shutil.copy(src_annotation, dst_annotation)

cars001
cars002
cars003
cars004
cars005
cars006
cars007
cars008
cars009
cars010
cars011
cars012
cars013
cars014
cars015
cars016
cars017
cars018
cars019
cars020
cars021
cars022
cars023
cars024
cars025
cars026
cars027
cars028
cars029
cars030
cars031
cars032
cars033
cars036
cars039
cars040
cars042
cars043
cars044
cars045
cars049
cars050
cars051
cars052
cars053
cars055
cars056
cars057
cars058
cars060
cars061
cars062
cars063
cars064
cars065
cars066
cars067
cars068
cars069
cars070
cars071
cars072
cars073
cars074
cars075
cars076
cars077
cars2_001
cars2_002
cars2_003
cars2_004
cars2_005
cars2_006
cars2_007
cars2_008
cars2_009
cars2_010
cars2_011
cars2_012
cars2_013
cars2_014
cars2_015
cars2_016
cars2_017
cars2_018
cars2_019
cars2_020
cars2_021
cars2_022
cars2_023
cars2_024
cars2_025
cars2_026
cars2_027
cars2_028
cars2_029
cars2_030
cars2_031
cars2_032
cars2_033
cars2_034
cars2_035
cars2_036
cars2_037
cars2_038
cars2_039
cars2_040
cars2_041
cars2_042
cars2_043
cars2_044
cars2_045
cars2_046
cars

In [224]:
'''
Split the files in OID dataset in test-train-validate subsets
'''

#defining the necessary directories
sub_directory = 'OID'
directory = r'D:\datasets\OID\New folder\image'
annotation_directory = r'D:\datasets\OID\New folder\annotation'
save_directory = r'D:\datasets\test-Train\OID'


#calculate the number of files for test-train-validate split
num_total_files = len([entry for entry in os.listdir(directory) if os.path.isfile(os.path.join(directory, entry))]) - 1
num_train_files = int(num_total_files * 0.7)
num_val_files = int(num_total_files * 0.2)
num_test_files = num_total_files - num_train_files - num_val_files

#lets create and randomise the list so that consecutive images don't end up in same category
total_files = []
for images in os.walk(directory):
    total_files.append(images)
total_files = total_files[0][2]
np.random.shuffle(total_files)

train_files = total_files[0:num_train_files]
val_files = total_files[num_train_files: num_train_files + num_val_files]
test_files = total_files[num_train_files + num_val_files:]



for image in os.listdir(directory):
    if image.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
        imagename = image[:-4] #removing the extention
        src = os.path.join(directory, image)
        annotation_file = image[:-3] + "txt"
        src_annotation = os.path.join(annotation_directory, annotation_file)
        print(imagename)
        if image in train_files: 
            dst = os.path.join(save_directory, "train", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "train", "{}_".format(sub_directory) + annotation_file)
        if image in val_files:
            dst = os.path.join(save_directory, "val", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "val", "{}_".format(sub_directory) + annotation_file)
        if image in test_files:
            dst = os.path.join(save_directory, "test", "{}_".format(sub_directory) + image)
            dst_annotation = os.path.join(save_directory, "test", "{}_".format(sub_directory) + annotation_file)
           

        shutil.copy(src, dst)
        shutil.copy(src_annotation, dst_annotation)

0006dc0977056410
0007870724402b51
000812dcf304a8e7
000883b46adeca7e
000f52302c1341eb
00103aa8df33633a
00192c1070c51ae3
001bebecea382500
001ca84e28488f7a
001ddeda193f5789
0021af0b921af690
0022fdfc1880d432
002519f868563098
002521102ecfac4c
002635f5f3dfd5b9
00268794e47eba14
002b11030ee89858
002f860e692757f7
002fbf2de58da8b5
0039ac4bfb8bd69d
003a5aaf6d17c917
003a9e99a2753141
003df8cf2effae50
00403b390556e56d
00438dad193f0c28
0043ea91600e3cb8
0045d88407bab692
004846b7de6995f2
00492e9dbaf3c2a1
0049ed70c1b2d480
004abd1e49165b1c
004e03d45963728e
004fddebf0ac943c
00541a1a33358591
005706e3eb3d276f
005932632ad92371
005ced5a9813fb0f
0063cc2c5f7d82ed
00723dac8201a83e
007909bbfb014a75
0080f94f60085954
00831662d2ba731a
008637722500f239
0090a11fa8b2b69d
0092a20b63799744
009824ffd4586a33
00a7d31c6cc6b7f3
00a8d9ab7c3b9c0e
00bffc29492dbc26
00d1a3fa14b9c5e1
00d202cef5442753
00d366b6a4bf4e15
00d5963c8f8e5381
00d763761e47f723
00d90448dcea9140
00d9a751d73ee232
00d9db3d2c186504
00e4cd60e84c0463
01121f7b338535

In [50]:
'''
create train.txt and val.txt in format "image_path1 x1,y1,x2,y2,id x1,y1,x2,y2,id x1,y1,x2,y2,id ..."
from yolo annotations
'''

#define necessary directories
path = r'D:\datasets\test-Train\train'
dst_path = r'D:\Python\PyTorch\yolov4\data'

for file in os.listdir(path):
    if file.lower().endswith(('.txt')):
        annotation_file = os.path.join(path, file)
        
        #read the yolo annotation file
        with open(annotation_file) as f:
            lines = f.readlines()
            
        #if len(lines) = 1, it causes an error. maybe because there isn' "\n" to replace
        for j in range(len(lines)):
            if "\n" in lines[j]:
                lines[j] = lines[j].replace("\n", "")

        #get the info about image
        image_path = os.path.join(path, file[:-3] + "jpg")
        im = Image.open(image_path)
        w = im.size[0]
        h = im.size[1]

        #declare a list to convert the annotations in format "image_path1 x1,y1,x2,y2,id x1,y1,x2,y2,id x1,y1,x2,y2,id ..."
        lst = [image_path]
        
        #for each object in the image
        for i in range(len(lines)):
            #because in some annotation files the first line is blank
            if lines[i]!="":
                #assign the respective variables
                #not using class_id cause I may have mistakenly labelled a class as LP instead of 0
                class_id = float(lines[i].split(" ",4)[0])
                x_center = float(lines[i].split(" ",4)[1])
                y_center = float(lines[i].split(" ",4)[2])
                width = float(lines[i].split(" ",4)[3])
                height = float(lines[i].split(" ",4)[4])

                #convert the location of object from respective to absolute
                float_x_center = w * x_center
                float_y_center = h * y_center
                float_width = w * width
                float_height = h * height

                #calculate the values of x1,x2,y1,y2
                x1 = int(float_x_center - (float_width/2))
                x2 = int(float_x_center + (float_width/2))
                y1 = int(float_y_center - (float_height/2))
                y2 = int(float_y_center + (float_height/2))

                #assign the location of one object to a temporary variable
                temp = '{},{},{},{},0'.format(x1,x2,y1,y2)

                lst.append(temp)

            
        annotation = ' '.join(lst)
        annotation = "{}\n".format(annotation)
        print(annotation)
        
        dst_file = 'train.txt'
        dst = os.path.join(dst_path,dst_file)
        with open(dst, 'a') as f:
            f.write(annotation)

 

D:\datasets\test-Train\train\AC_10.jpg 115,192,193,216,0

D:\datasets\test-Train\train\AC_100.jpg 14,93,159,179,0

D:\datasets\test-Train\train\AC_102.jpg 118,196,123,149,0

D:\datasets\test-Train\train\AC_103.jpg 216,292,130,153,0

D:\datasets\test-Train\train\AC_104.jpg 62,132,150,167,0

D:\datasets\test-Train\train\AC_106.jpg 39,111,147,168,0

D:\datasets\test-Train\train\AC_108.jpg 132,211,177,199,0

D:\datasets\test-Train\train\AC_11.jpg 130,204,149,173,0

D:\datasets\test-Train\train\AC_111.jpg 139,216,168,187,0

D:\datasets\test-Train\train\AC_115.jpg 97,170,187,209,0

D:\datasets\test-Train\train\AC_117.jpg 164,247,191,217,0

D:\datasets\test-Train\train\AC_118.jpg 240,314,155,183,0

D:\datasets\test-Train\train\AC_119.jpg 227,303,160,182,0

D:\datasets\test-Train\train\AC_12.jpg 59,131,157,175,0

D:\datasets\test-Train\train\AC_120.jpg 79,154,181,208,0

D:\datasets\test-Train\train\AC_122.jpg 258,334,105,124,0

D:\datasets\test-Train\train\AC_123.jpg 37,108,129,154,0

D:\datas

In [14]:
'''
some txt files had blank first lines
so we removed them to avoid any further errors
'''

import os

path = r'D:\Python\PyTorch\yolov4\labels\val'
new_path = r'D:\Python\PyTorch\yolov4\labels'
#creating a list of files
defected = []
for files in os.listdir(path):
    filepath = os.path.join(path, files)
    with open(filepath) as f:
        lines = f.readlines()
    for j in range(len(lines)):
        if "\n" in lines[j]:
            lines[j] = lines[j].replace("\n", "")
        #checking if any line is blank
        if lines[j]=="":
            defected.append(files)
            
for i in range(len(defected)):
    oldfilepath = os.path.join(path,defected[i])
    newfilepath = os.path.join(new_path,defected[i])
    with open(oldfilepath) as f:
        lines = f.readlines()
    newlines = []
    for j in range(len(lines)):
        #copying the contents of defective file to new file without the blank lines
        if lines[j]!="\n":
            newlines.append(lines[j])
    printlines = " ".join(newlines)
    with open(newfilepath, 'a') as f:
            f.write(printlines)
    
#     print(newfilepath)


In [None]:
import os

image_files = []
os.chdir(os.path.join("data", "test"))
for filename in os.listdir(os.getcwd()):
    if filename.endswith(".jpg"):
        image_files.append("data/test/" + filename)
os.chdir("..")
with open("test.txt", "w") as outfile:
    for image in image_files:
        outfile.write(image)
        outfile.write("\n")
    outfile.close()
os.chdir("..")