In [49]:
import os
import cv2
import random
from functions import *

In [50]:
ladd_path = '../../../ladd-and-weights/dataset/full_train_ds'
ipsar_path = '../../../ladd-and-weights/dataset/3rd_party/heridal'
ladd_anotation_path = os.path.join(ladd_path,'Annotations')
ipsar_anotation_path = os.path.join(ipsar_path,'Annotations')
target_path = '../../networks/data/'

In [51]:
INCLUDE_LADD = True
INCLUDE_IPSAR = True
CROP_SIZE = 608

In [52]:
annotation_files = {}
if INCLUDE_LADD:
    annotation_files.update({f.split('.xml')[0]:os.path.join (ladd_anotation_path,f) for f in os.listdir(ladd_anotation_path)})
if INCLUDE_IPSAR:
    annotation_files.update({f.split('.xml')[0]:os.path.join (ipsar_anotation_path,f) for f in os.listdir(ipsar_anotation_path)})
print('total annotations: ',len(annotation_files))

total annotations:  3203


In [53]:
dataset_images_path=os.path.join(target_path,'images')
dataset_labels_path=os.path.join(target_path,'labels')
os.makedirs(dataset_images_path,exist_ok=True)
os.makedirs(dataset_labels_path, exist_ok=True)

In [54]:
def get_crop_around(img_size, xyxy):
    left = max(0,xyxy[0]-random.randint(0,CROP_SIZE-xyxy[2]+xyxy[0]))
    right = min(left+CROP_SIZE,img_size[0])
    if right-left<CROP_SIZE and right==img_size[0]:
        left=right-CROP_SIZE
    top = max(0,xyxy[1] - random.randint(0,CROP_SIZE-xyxy[3]+xyxy[1]))
    bottom = min(top+CROP_SIZE,img_size[1])
    if bottom-top<CROP_SIZE and bottom==img_size[1]:
        top=bottom-CROP_SIZE
    return [left,top, right,bottom]
    

In [55]:
def split_image(file: str, target_set: str):
    id=os.path.split(file)[-1]    
    images_path_target=os.path.join(target_path,'images',target_set)
    labels_path_target=os.path.join(target_path,'labels',target_set)
    os.makedirs(images_path_target,exist_ok=True)
    os.makedirs(labels_path_target, exist_ok=True)
    points_xyxy=np.empty((0,4))
    is_labels_found = False
    if id in annotation_files and os.path.exists(annotation_files[id]):
        img_size, points_xyxy=get_imgSize_and_list_of_yxyx(annotation_files[id])
        points_xyxy=np.array(points_xyxy).reshape(-1,4)
        is_labels_found = True
    crop_idx = 0
    for b in points_xyxy:
        crop = get_crop_around(img_size,b)
        cond_other_bb_in_crop = (crop[0]<= points_xyxy[:,::2].sum(axis=1)//2) &\
            (crop[2]>= points_xyxy[:,::2].sum(axis=1)//2) &\
            (crop[1]<= points_xyxy[:,1::2].sum(axis=1)//2) &\
            (crop[3]>= points_xyxy[:,1::2].sum(axis=1)//2) &\
            (b[0]!=points_xyxy[:,0])

        target_pixel_boxes = [[
            max(0,box[0]-crop[0]),
            max(0,box[1]-crop[1]),
            min(CROP_SIZE,box[2]-crop[0]),
            min(CROP_SIZE,box[3]-crop[1])
        ]for box in np.vstack([b,points_xyxy[cond_other_bb_in_crop,:]])]

        target_boxes=[[
            (box[0]+box[2])//2/CROP_SIZE,
            (box[1]+box[3])//2/CROP_SIZE,
            abs(box[0]-box[2])/CROP_SIZE,
            abs(box[1]-box[3])/CROP_SIZE
        ] for box in target_pixel_boxes]

        target_lines = [" ".join(["0",*map(str,b)])+'\n' for b in target_boxes]

        with open(os.path.join(labels_path_target,id+'_'+str(crop_idx)+'.txt'),'w') as f:
            f.writelines(target_lines)
        target_image_name = os.path.join(images_path_target,id+'_'+str(crop_idx)+'.jpg')
        try:
            cv2.imwrite(target_image_name, cv2.imread(file+'.jpg')[crop[1]:crop[3], crop[0]:crop[2]])
        except Exception as e:
            print(file,target_image_name,crop,cv2.imread(file+'.jpg').shape,e)

        crop_idx+=1
    if points_xyxy.shape[0]==0:
        with open(os.path.join(labels_path_target,id+'_0'+'.txt'),'w') as f:
            f.writelines(['\n'])
        target_image_name = os.path.join(images_path_target,id+'_0'+'.jpg')
        im = cv2.imread(file+'.jpg')
        x = random.randint(0,im.shape[1]-CROP_SIZE)
        y = random.randint(0,im.shape[0]-CROP_SIZE)
        cv2.imwrite(target_image_name,im[y:y+CROP_SIZE,x:x+CROP_SIZE])
    return is_labels_found



In [56]:
trainset = []
if INCLUDE_LADD:
    with open(os.path.join(ladd_path,'ImageSets/Main/train.txt'),'r') as f:
        trainset.extend([os.path.join(ladd_path,'JPEGImages',s.strip()) for s in f.readlines()])
    print(len(trainset))
if INCLUDE_IPSAR:
    with open(os.path.join(ipsar_path,'ImageSets/Main/train.txt'),'r') as f:
        trainset.extend([os.path.join(ipsar_path,'JPEGImages',s.strip()) for s in f.readlines()])
    print(len(trainset))

1311
2894


In [57]:
for file in trainset:
    labels_missing = 0
    if not split_image(file,'train'):
        labels_missing+=1
print(labels_missing)

0


In [58]:
valset = []
if INCLUDE_LADD:
    with open(os.path.join(ladd_path,'ImageSets/Main/val.txt'),'r') as f:
        valset.extend([os.path.join(ladd_path,'JPEGImages',s.strip()) for s in f.readlines()])
if INCLUDE_IPSAR:
    with open(os.path.join(ipsar_path,'ImageSets/Main/val.txt'),'r') as f:
        valset.extend([os.path.join(ipsar_path,'JPEGImages',s.strip()) for s in f.readlines()])

In [59]:
for file in valset:
    split_image(file,'valid')