In [4]:
import os
import cv2
import numpy as np
import math
import matplotlib.pyplot as plt
import random
IMAGE_SIZE = (384,384)
import shutil

In [5]:
#This function will remove the first line 
#in all the text files of labels 
#as the first line is YOLO_OBB
def remove_first_line(path):
    files=os.listdir(path)
    for file in files:
        s=file[-3:]
        if s!="txt":
            continue
        f=open(os.path.join(path,file),'r')
        all_lines=f.readlines()
        frstline="YOLO_OBB\n"
        if all_lines[0]!=frstline:
            continue
        output_filenm=path+"/"+file+"1"
        output_file=open(output_filenm,"w")
        output_file.writelines(all_lines[1:])
        output_file.close()
        f.close()
        os.remove(os.path.join(path,file))
        os.rename(os.path.join(path,output_filenm),os.path.join(path,file))

In [6]:
#getLabels and plotOBB
#This function will return all the labels in a 2d list
#This function is an helper function for the function plotOBB
def getLabels(txtPath):
    txtf=open(txtPath,'r')
    all_labels=list()
    for x in txtf.readlines():
        tmp=x.split()
        tmp[0]=int(tmp[0])
        for i in range(1,len(tmp)):
            tmp[i]=float(tmp[i])
        all_labels.append(tmp)
    txtf.close()
    return all_labels

#This function will plot the bounding box
#imgPath and txtPath are provided

def plotOBB(imgPath,txtPath):
    imgf=cv2.imread(imgPath,3)
    all_labels=getLabels(txtPath)
    
    #Plotting all the labels
    imgOBB=[]

    for label in all_labels:
        box=[tuple(label[1:3]),tuple(label[3:5]),-label[5]]
        box=cv2.boxPoints(box)
        box=np.int0(box)
        imgOBB=cv2.drawContours(imgf,[box],-1,(0,0,255),2)
        font=cv2.FONT_HERSHEY_SIMPLEX
        #cv2.putText(imgf,str(label[0]),(int(label[1]),int(label[2])),font,0.9,(0,0,255),2)
    
    plt.imshow(imgOBB)
    plt.show()

In [7]:
#resizeImageAndCreateNewLabel
#this function is will determine whether the format
def IsThisAImage(imgFormats,filePath):
    imgFormat=""
    for x in imgFormats:
        if x in filePath:
            imgFormat=x
            break
    return imgFormat

#This will take input of a single image path and 
#resize the image also get the new labels created in a 
#new text file.
def resizeImageAndCreateNewLabel(imgPath,txtPath,imgFormat,newX,newY):
    #plotOBB(imgPath,txtPath)
    imgf=cv2.imread(imgPath,3)
    oldY=imgf.shape[0]
    oldX=imgf.shape[1]
  
    scaleX=newX/oldX
    scaleY=newY/oldY
    
    newImg=cv2.resize(imgf,(newX,newY),interpolation=cv2.INTER_AREA)
    all_labels=getLabels(txtPath)
    #Converting the old labels into new labels
    alpha=0.2
    for i in range(len(all_labels)):
        all_labels[i][1]*=scaleX
        all_labels[i][2]*=scaleY
        all_labels[i][3]*=min(1,(scaleX+alpha))
        all_labels[i][4]*=(scaleY)
    
    newImgPath=imgPath[:-len(imgFormat)-1]+"cp1c2c."+imgFormat
    newTxtPath=txtPath[:-len(imgFormat)-1]+"cp1c2c.txt"
    newtxtFile=open(newTxtPath,'w')
    for label in all_labels:
        tmplabel=[]
        for x in label:
            tmplabel.append(str(x))
        line=" ".join(tmplabel)
        line=line+"\n"
        newtxtFile.write(line)
    newtxtFile.close()

    if not cv2.imwrite(newImgPath,newImg):
       print("Failed to created new image of :",imgPath)
    
    #plotOBB(imgPath,txtPath)
    os.remove(imgPath)
    os.remove(txtPath)
    os.rename(newImgPath,imgPath)
    os.rename(newTxtPath,txtPath)
    #plotOBB(imgPath,txtPath)
    


#This function will take input of the directory which contain all
#the images and text files, and the possible image formats, and 
#the new dimensions of the image
def resizeImagesAndCreateNewLabels(path,imgFormats,newX,newY):
    files=os.listdir(path)
    #This will contain images which do not have corresponding labels
    errList=list()
    for file in files:
        imgFormat=IsThisAImage(imgFormats,file)
        if len(imgFormat)==0:
            continue
            
        txtPath=file[:-len(imgFormat)-1]+".txt"
        
        if txtPath not in files:
            errList.append(imgPath)
            continue
            
        imgPath=os.path.join(path,file)
        txtPath=os.path.join(path,txtPath)
        resizeImageAndCreateNewLabel(imgPath,txtPath,imgFormat,newX,newY)
    
    if len(errList)>0:
        print("The following images do not have the corresponding labels:")
        for s in errList:
            print(s)


In [8]:
#Preprocessing the labels
#This function will take input of the directory containing all the
#labels. The labels in the directory are expected to be in (class,x,y,h,w,theta)
#format. [Theta is in degrees] The x,y,h,w are not normalized as well.
#This function will normalize the parameters (x,y,h,w) 
#After preprocessing the labels are in the form (class,x',y',h',w',sinTheta)
#All the images are supposed to be of size IMAGE_SIZE
decPlaces=8
def preprocessTheLabels(path):
    files=os.listdir(path)
    for file in files:
        s=file[-3:]
        if s!="txt" or file=="classes.txt" or file=="Classes.txt":
            continue

        txtf=open(os.path.join(path,file),'r')
        newtxtPath=os.path.join(path,file+"1")
        newtxtf=open(newtxtPath,'w')
        
        for line in txtf.readlines():
            liststr=line.split()
            for i in range(1,len(liststr)):
                liststr[i]=float(liststr[i])
            liststr[1]=str(round(liststr[1]/IMAGE_SIZE[0],decPlaces))
            liststr[2]=str(round(liststr[2]/IMAGE_SIZE[1],decPlaces))
            liststr[3]=str(round(liststr[3]/IMAGE_SIZE[0],decPlaces))
            liststr[4]=str(round(liststr[4]/IMAGE_SIZE[1],decPlaces))

            #considers angle 90-theta to get alpha of five dim representation
            #can be proven by basic geometry.
            flg=1
            if liststr[5]<0:
                flg=-1
            theta=math.radians(90*flg-liststr[5])

            liststr[5]=str(round(math.sin(theta),decPlaces))
            #liststr.append(str(round(math.sin(theta),decPlaces)))
            line=" ".join(liststr)
            line=line+"\n"
            newtxtf.write(line)
        
        newtxtf.close()
        txtf.close()
        os.remove(os.path.join(path,file))
        os.rename(newtxtPath,os.path.join(path,file))
        

In [9]:
#creating training and validation dataset
def getRandomVal():
    return 0.4

def createTrainingAndValidationDataset(srcPath,TrainingPath,ValidationPath):
    files=os.listdir(srcPath)
    random.shuffle(files,getRandomVal)
    mult=5
    rem=3
    cnt=0
    for file in files:
        s=file[-3:]
        if s!="jpg":
            continue
        
        #only for image files with extension of len 3
        txtPath=os.path.join(srcPath,file[:-3]+"txt")
        if file[:-3]+"txt" not in files:
            continue
    
        imagePath=os.path.join(srcPath,file)
        cnt+=1
        if cnt%mult==rem:
            shutil.move(imagePath,ValidationPath)
            shutil.move(txtPath,ValidationPath)
        else:
            shutil.move(imagePath,TrainingPath)
            shutil.move(txtPath,TrainingPath)


In [22]:
def copyFilesToFiveFoldDataset(pathDirectory,destDirectory):
    files=os.listdir(pathDirectory)
    for file in files:
        s=file[-3:]
        if s!="jpg":
            continue

        txtFile=file[:-3]+"txt"
        if file[:-3]+"txt" not in files:
            continue
        
        shutil.copy(os.path.join(pathDirectory,file),os.path.join(destDirectory+"/"+file))
        shutil.copy(os.path.join(pathDirectory,txtFile),os.path.join(destDirectory+"/"+txtFile))

In [None]:
%run creating5FoldDataset.ipynb

def main():
    
    CLASSES=['Curved_Mayo_Scissor','Scalpel','Straight_Dissection_Clamp','Babcock_Tissue_Forceps','Mayo_Needle_Holder','Deaver_Retractor','Metzenbaum_Scissor','Microvascular_Needle_Holder']
    curwd = (os.getcwd()).replace("\\","/")
    path = curwd + "/Dataset/CompleteDataset"

    #Creating folder for five folds
    fiveFoldDataset = curwd + "/Dataset/FiveFoldCrossValidationDataset"
    os.mkdir(fiveFoldDataset)
    
    imgFormats=["jpg"]
    doEnable = False
    for CLASS in CLASSES:
        newpath = path + "/" + CLASS
        
        if doEnable:
            remove_first_line(newpath)

        if doEnable:
            #Update the global variable IMAGE_SIZE when using for Image size other 384x384
            resizeImagesAndCreateNewLabels(newpath,imgFormats,384,384)

        if doEnable:
            preprocessTheLabels(newpath)

        if doEnable:
            copyFilesToFiveFoldDataset(newpath, fiveFoldDataset)
    
    if doEnable:
        create5FoldDataset(fiveFoldDataset)
        createTrainingAndValidationDatasetForFiveFolds(fiveFoldDataset)
   

if __name__=="__main__":
    main()