In [None]:
# import the os module
import os
import shutil
import random

# detect the current working directory and print it
path = os.getcwd()  
print ("The current working directory is %s" % path)  

## CREATE FILESTRUCTURE


Foldername
<ul>
<ul><li>images</li>
<ul><li>val</li><li>train</li></ul>
<li>labels</li>
<ul><li>val</li><li>train</li></ul>
</ul>

<b>USE</b>: createFilestructure(<i>foldername</i>)

In [None]:
def createDirectory(path):
    try:  
        os.mkdir(path)
    except OSError:  
        print ("Creation of the directory %s failed" % path)
    else:  
        print ("Successfully created the directory %s " % path)
    
            

In [None]:
def createFilestructure(foldername):
    main_dir = ["images", "labels"] 
    sub_dir = ["val", "train"]

    for dir1 in main_dir:
        for dir2 in sub_dir:
            try: os.makedirs(os.path.join(dir1,dir2))
            except OSError: pass
    imgPath = '%s/images'%(path)
    lblPath = '%s/labels'%(path)
    newPath = '%s/%s'%(path, foldername)
    createDirectory(newPath)

    shutil.move(imgPath, newPath) 
    shutil.move(lblPath, newPath) 

## CREATE DATASET


In [None]:
def moveLabelFiles(dir):
    val = [f[:-4] for f in os.listdir('%s/images/val'%dir) if f[-4:].lower() == '.jpg'] 
    train = [f[:-4] for f in os.listdir('%s/images/train'%dir) if f[-4:].lower() == '.jpg'] 
    
    for f in val:
        shutil.copy(('%s/annotations/%s.txt'%(dir, f)), ('%s/labels/val'%(dir)))
    for i in train:
        shutil.copy(('%s/annotations/%s.txt'%(dir, i)), ('%s/labels/train'%(dir)))


In [None]:
def moveFiles(imageDir, labelDir, outputDir):
    [train, val, test] = splitData(imageDir)
    
    for f in val:
        shutil.copy(('%s/%s.jpg'%(imageDir, f)), ('%s/images/val'%(outputDir)))
        shutil.copy(('%s/%s.txt'%(labelDir, f)), ('%s/labels/val'%(outputDir))) 
    for i in train:
        shutil.copy(('%s/%s.jpg'%(imageDir, i)), ('%s/images/train'%(outputDir)))
        shutil.copy(('%s/%s.txt'%(labelDir, i)), ('%s/labels/train'%(outputDir))) 
    for s in test:
        shutil.copy(('%s/%s.jpg'%(imageDir, s)), ('%s/testData'%(path)))


In [None]:
def splitData(imageDir):
    files = [f[:-4] for f in os.listdir(imageDir) if f[-4:].lower() == '.jpg'] # getting a list of all imageNames in folder
    files = [f[:-4] for f in os.listdir(imageDir) if f[-4:].lower() == '.jpg'] # getting a list of all imageNames in folder

    # random divide  
    trainval = random.sample(files, int(0.9*len(files)))
    test = [f for f in files if f not in trainval]

    # random divide 
    train = random.sample(trainval, int(0.9*len(trainval)))
    val = [f for f in trainval if f not in train]
    return [train, val, test]

#### USAGE:
createFilestructure(datasetName)

imageDir = 'path/to/images'

labelDir = 'path/to/labels'

outputDir = '%s/datasetName'%(path)

moveFiles(imageDir, labelDir, outputDir)

## CREATE LOCATION FILE
<b>createLocationFile</b>: Creates a file with locations for all images in the folder of a type

<b>getLocationFiles</b>: Creates two locationFiles for a folder; one for the validation set and one for the training set. Moves the files into the folder 

In [None]:
def createLocationFile(type, files, foldername):
    list_file = open('%sLocations.txt'%(type), 'w')
    for f in files:
        list_file.write('%s/%s/images/%s/%s.jpg'%(path, foldername, type, f))
        list_file.write('\n')

In [None]:
#Defining locations for datasets 
syntheticData = 'custom/syntheticData'
dronebasedData = 'custom/dronebasedData'
combinedData = 'custom/combinedData'
combinedTiledData = 'custom/combinedTiledData'



In [None]:
def getLocationFiles(folder):
    valFiles = [f[:-4] for f in os.listdir('%s/%s/images/val'%(path,folder)) if f[-4:].lower() == '.jpg'] 
    trainFiles = [f[:-4] for f in os.listdir('%s/%s/images/train'%(path,folder)) if f[-4:].lower() == '.jpg']
    createLocationFile('val', valFiles, folder)
    createLocationFile('train', trainFiles, folder)    
    oldPath = '%s'%(path)
    newPath = '%s/%s'%(path, folder)

    shutil.move('%s/valLocations.txt'%oldPath, newPath) 
    shutil.move('%s/trainLocations.txt'%oldPath, newPath) 
    

#### USAGE:
getLocationFiles(inputfolder)

## COUNT IMAGES
<b>countAnnotations</b>: counts annotations in a labelfile 

<b>countImagesAndLabelsOfType</b>: counts images and labels within a type folder

<b>getImagesAndLabelsCount</b>: Returns # of annotations and # images within each class, and the total annotations.  


In [None]:
def countAnnotations(filepath):
    other = 0
    can = 0
    plastic = 0
    plasticBottle = 0
    with open(filepath) as fp:  
        line = fp.readline()
        
        cnt = 1
        while line:
            clas, xcenter, ycenter, width, height = line.split()
            if(clas=='0'):
                other = other + 1
            elif(clas=='1'):
                can = can + 1
            elif(clas == '2'):
                plastic = plastic + 1
            elif(clas == '3'):
                plasticBottle += 1
            line = fp.readline()
    return [other, can, plastic, plasticBottle]


In [None]:
from operator import add 
def countImagesAndLabelsOfType(imgdir, type):
    files = [f[:-4] for f in os.listdir('%s/%s'%(imgdir, type)) if f[-4:].lower() == '.txt']
    labelCount = [0, 0, 0, 0]
    imageCount = [0, 0, 0, 0]
    count = 0
    for f in files:
        count += 1
        annotationCount = countAnnotations('%s/%s/%s.txt'%(imgdir,type, f))
        labelCount = list(map(add, labelCount, annotationCount)) 
        imageValues = [1 if x > 0 else 0 for x in annotationCount] #checks if value is in image
        imageCount = list(map(add, imageCount, imageValues)) 

    return [labelCount, imageCount]

In [None]:
def getImagesAndLabelsCount(labelDir):
    print(labelDir)
    valCount, imgvalcount = countImagesAndLabelsForType(labelDir, 'val') 
    trainCount, imgtraincount = countImagesAndLabelsForType(labelDir, 'train') 
    totalCount = list(map(add, valCount, trainCount)) 
    totalimgCount = list(map(add, imgvalcount, imgtraincount)) 
    print('Images with annotations: %s'%totalimgCount)
    print('Total annotations: %s'%totalCount)
    print('Annotations: %s' %(sum(totalCount)))


#### USAGE

print('DRONEBASED:')
droneDir = '%s/%s/labels'%(path, dronebasedData)
getImagesAndLabelsCount(droneDir)

print('SYNTHETIC:')
syntDir = '%s/%s/labels'%(path, syntheticData)
getImagesAndLabelsCount(syntDir)

print('COMBINED:')
combDir = '%s/%s/labels'%(path, combinedData)
getImagesAndLabelsCount(combDir)

print('COMBINED TILED:')
combTDir = '%s/%s/labels'%(path, combinedData)
getImagesAndLabelsCount(labelDir)

syntheticData = 'custom/syntheticData'
dronebasedData = 'custom/dronebasedData'
combinedData = 'custom/combinedData'
combinedTiledData = 'custom/combinedTiledData'

