In [1]:
import cv2
import imutils
import numpy as np
import matplotlib.pyplot as plt
import json 
import os
import glob
import PIL.Image as Image
import copy
import time
import shutil

from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.models as models

### Load data

#### Change project directory and run once

In [2]:
### Run once
data_path='/dtu/datasets1/02514/data_wastedetection'
project_path = os.path.join(os.getcwd(), 'Project1.2')
os.mkdir(os.path.join(project_path, 'data'))
os.mkdir(os.path.join(project_path, 'data', 'raw'))
os.mkdir(os.path.join(project_path, 'data', 'splitted'))
os.mkdir(os.path.join(project_path, 'data', 'raw', 'test'))
os.mkdir(os.path.join(project_path, 'data', 'raw', 'train'))
os.mkdir(os.path.join(project_path, 'data', 'splitted', 'test'))
os.mkdir(os.path.join(project_path, 'data', 'splitted', 'train'))
os.mkdir(os.path.join(project_path, 'data', 'splitted', 'train', 'Background'))

annotations = json.load(open(os.path.join('Project1.2/annotations.json')))
supercategories = {}
categories = ['Background']
for i in range(len(annotations['categories'])):
    supercategories[str(i)] = annotations['categories'][i]['supercategory']
    if annotations['categories'][i]['supercategory'] not in categories:
        categories.append(annotations['categories'][i]['supercategory'])
        os.mkdir(os.path.join(project_path, 'data', 'raw', 'test', annotations['categories'][i]['supercategory']))
        os.mkdir(os.path.join(project_path, 'data', 'raw', 'train', annotations['categories'][i]['supercategory']))
        os.mkdir(os.path.join(project_path, 'data', 'splitted', 'test', annotations['categories'][i]['supercategory']))
        os.mkdir(os.path.join(project_path, 'data', 'splitted', 'train', annotations['categories'][i]['supercategory']))


for image_id in range(len(annotations['images'])):
    if int(annotations['images'][image_id]['file_name'].split('_')[1].split('/')[0]) < 13:
        shutil.copyfile(os.path.join(data_path, annotations['images'][image_id]['file_name']), os.path.join(project_path, 'data', 'raw', 'train', supercategories[str(annotations['annotations'][image_id]['category_id'])], str(image_id)+'.jpg'))
    else:
        shutil.copyfile(os.path.join(data_path, annotations['images'][image_id]['file_name']), os.path.join(project_path, 'data', 'raw', 'test', supercategories[str(annotations['annotations'][image_id]['category_id'])], str(image_id)+'.jpg'))

In [3]:
def get_iou(bb1, bb2):
    assert bb1['x1'] < bb1['x2']
    assert bb1['y1'] < bb1['y2']
    assert bb2['x1'] < bb2['x2']
    assert bb2['y1'] < bb2['y2']
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [4]:
path_train = os.path.join(os.getcwd(), 'Project1.2', 'data', 'raw', 'train')
size = 224

ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

for cat in categories:
    try:
        if cat != 'Background':
            for folder_name in os.listdir(path_train):
                for image_name in os.listdir(os.path.join(path_train, folder_name)):
                    print(image_name)
                    gtvalues=[]
                    image_id = int(image_name.split('.')[0])
                    path = os.path.join(path_train, folder_name, image_name)
                    image = cv2.imread(path)
                    x1 = int(annotations['annotations'][image_id]['bbox'][0])
                    y1 = int(annotations['annotations'][image_id]['bbox'][1])
                    x2 = int(annotations['annotations'][image_id]['bbox'][2])
                    y2 = int(annotations['annotations'][image_id]['bbox'][3])
                    gtvalues.append({"x1":x1,"x2":x2,"y1":y1,"y2":y2})
                    ss.setBaseImage(image)
                    ss.switchToSelectiveSearchFast()
                    ssresults = ss.process()
                    imout = image.copy()
                    counter = 0
                    falsecounter = 0
                    flag = 0
                    fflag = 0
                    bflag = 0
                    for e,result in enumerate(ssresults):
                        if e < 100 and flag == 0:
                            print(e)
                            for gtval in gtvalues:
                                x,y,w,h = result
                                iou = get_iou(gtval,{"x1":x,"x2":x+w,"y1":y,"y2":y+h})
                                if counter < 30:
                                    if iou > 0.70:
                                        timage = imout[y:y+h,x:x+w]
                                        resized = cv2.resize(timage, (size,size), interpolation = cv2.INTER_AREA)
                                        cv2.imwrite(path.replace('raw', 'splitted'), resized)
                                        counter += 1
                                        # resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                                        # train_images.append(timage)
                                        # train_labels.append(1)
                                    else :
                                        fflag =1
                                if falsecounter <30:
                                    if iou < 0.3:
                                        timage = imout[y:y+h,x:x+w]
                                        resized = cv2.resize(timage, (size,size), interpolation = cv2.INTER_AREA)
                                        cv2.imwrite(path.replace('raw', 'splitted').replace(folder_name, 'Background'), resized)
                                        falsecounter += 1
                                        # resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                                        # train_images.append(timage)
                                        # train_labels.append(0)
                                    else :
                                        bflag = 1
                            if fflag == 1 and bflag == 1:
                                print("inside")
                                flag = 1
            
    except Exception as e:
        print(e)
        print("error in "+folder_name)
        continue


# train_images=[]
# train_labels=[]
# for e,i in enumerate(os.listdir(annot)):
#     try:
#         if i.startswith("airplane"):
#             filename = i.split(".")[0]+".jpg"
#             print(e,filename)
#             image = cv2.imread(os.path.join(path,filename))
#             df = pd.read_csv(os.path.join(annot,i))
#             gtvalues=[]
#             for row in df.iterrows():
#                 x1 = int(row[1][0].split(" ")[0])
#                 y1 = int(row[1][0].split(" ")[1])
#                 x2 = int(row[1][0].split(" ")[2])
#                 y2 = int(row[1][0].split(" ")[3])
#                 gtvalues.append({"x1":x1,"x2":x2,"y1":y1,"y2":y2})
#             ss.setBaseImage(image)
#             ss.switchToSelectiveSearchFast()
#             ssresults = ss.process()
#             imout = image.copy()
#             counter = 0
#             falsecounter = 0
#             flag = 0
#             fflag = 0
#             bflag = 0
#             for e,result in enumerate(ssresults):
#                 if e < 2000 and flag == 0:
#                     for gtval in gtvalues:
#                         x,y,w,h = result
#                         iou = get_iou(gtval,{"x1":x,"x2":x+w,"y1":y,"y2":y+h})
#                         if counter < 30:
#                             if iou > 0.70:
#                                 timage = imout[y:y+h,x:x+w]
#                                 resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
#                                 train_images.append(resized)
#                                 train_labels.append(1)
#                                 counter += 1
#                         else :
#                             fflag =1
#                         if falsecounter <30:
#                             if iou < 0.3:
#                                 timage = imout[y:y+h,x:x+w]
#                                 resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
#                                 train_images.append(resized)
#                                 train_labels.append(0)
#                                 falsecounter += 1
#                         else :
#                             bflag = 1
#                     if fflag == 1 and bflag == 1:
#                         print("inside")
#                         flag = 1
#     except Exception as e:
#         print(e)
#         print("error in "+filename)
#         continue

919.jpg


#### Taco class

In [None]:
class Taco(torch.utils.data.Dataset):
    def __init__(self, train, transform, data_path=os.path.join(os.getcwd(), 'Project1.2', 'data', 'raw')):
        'Initialization'
        self.transform = transform
        data_path = os.path.join(data_path, 'train' if train else 'test')
        image_classes = [os.path.split(d)[1] for d in glob.glob(data_path +'/*') if os.path.isdir(d)]
        image_classes.sort()
        self.name_to_label = {c: id for id, c in enumerate(image_classes)}
        self.image_paths = glob.glob(data_path + '/*/*.jpg')
        
    def __len__(self):
        'Returns the total number of samples'
        return len(self.image_paths)

    def __getitem__(self, idx):
        'Generates one sample of data'
        image_path = self.image_paths[idx]
        
        image = Image.open(image_path)
        c = os.path.split(os.path.split(image_path)[0])[1]
        y = self.name_to_label[c]
        X = self.transform(image)
        return X, y

batch_size = 64

trans = transforms.Compose([ 
                            transforms.ToTensor(),
                            transforms.Resize((256, 256)),
                            ])

trainset = Taco(train=True, transform=trans)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)
testset = Taco(train=False, transform=trans)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=3)

images, labels = next(iter(train_loader))
plt.figure(figsize=(20,10))

print(f"Image shape: {images[0].numpy()[0].shape}")

for i in range(21):
    plt.subplot(5,7,i+1)
    plt.imshow(images[i].numpy()[0], 'gray')
    plt.title(labels[i].item())
    plt.axis('off')

In [None]:
# Load annotations
annotations = json.load(open(os.path.join('Project1.2', 'annotations.json')))

In [None]:
# Load images
data_path = '/dtu/datasets1/02514/data_wastedetection/'
images = {}
for image_id in range(len(annotations['images'])):
    images[image_id] = cv2.imread(os.path.join(data_path, annotations['images'][image_id]['file_name']))

In [None]:
# Get bounding box for every image 
annotations['bounding_boxes'] = {}
for image_id in range(len(annotations['images'])):
    bbxstart = 10000
    bbxfin = 0
    bbystart = 10000
    bbyfin = 0
    for id, value in enumerate(annotations['annotations'][image_id]['segmentation'][0]):
        if id % 2 == 0:
            if bbxstart > value: bbxstart = value
            if bbxfin < value: bbxfin = value
        else:
            if bbystart > value: bbystart = value
            if bbyfin < value: bbyfin = value

    annotations['bounding_boxes'][image_id] = [int(bbxstart), int(bbystart), int(bbxfin), int(bbyfin)]

### Resize images and bounding boxes

In [None]:
def resize(image, BB_start, BB_width, BB_height, size=(224, 224)):
    ### resize image
    img_resized = cv2.resize(image.copy(), size, interpolation = cv2.INTER_AREA)
    
    ### resize BB
    # get x and y ratio
    lx = size[0]/image.shape[1]
    ly = size[1]/image.shape[0]
    
    # get new (x,y), width, height
    BB_new_start = (int(BB_start[0]*lx), int(BB_start[1]*ly))
    BB_new_width = int(BB_width*ly)
    BB_new_height = int(BB_height*lx)
    
    return img_resized, BB_new_start, BB_new_width, BB_new_height

In [None]:
# # Resize images and bounding boxes
# resized_images = []
# annotations['new_bounding_boxes'] = {}
# for image_id in images:
    
#     # Resize images and bounding boxes
#     width = annotations['bounding_boxes'][image_id][2] - annotations['bounding_boxes'][image_id][0]
#     height = annotations['bounding_boxes'][image_id][3] - annotations['bounding_boxes'][image_id][1]
#     new_image, new_bb, new_width, new_height = resize(images[image_id], (annotations['bounding_boxes'][image_id][0], annotations['bounding_boxes'][image_id][1]), width, height)
    
#     # write resized images
#     resized_images.append(new_image)

#     # write new bounding boxes
#     annotations['new_bounding_boxes'][image_id] = [int(new_bb[0]), int(new_bb[1]), int(new_bb[0] + new_width), int(new_bb[1] + new_height)]