In [9]:
import numpy as np
import pickle as pkl
import os
from PIL import Image
from matplotlib import pyplot as plt
import matplotlib.patches as patches
from math import floor, ceil
import time
from collections import deque
import random
from IPython.display import clear_output
import xml.etree.ElementTree as ET
import time

In [28]:
roi_size = 2

In [11]:
def max_pool_dynamic_input(x, H_out, W_out):
    out = None
    (N, H, W, C) = x.shape
    pool_height = np.zeros((H_out), dtype="int32")
    pool_width = np.zeros((W_out), dtype="int32")
    
    div_h = int(H/H_out)
    mod_h = H%H_out
    div_w = int(W/W_out)
    mod_w = W%W_out
    pool_height = pool_height + div_h
    pool_width = pool_width + div_w
    pool_height[:mod_h] += 1
    pool_width[:mod_w] += 1
    
    out = np.zeros((N, H_out, W_out, C))
    for n in range(N):
        for h in range(H_out):
            for w in range(W_out):
                h1 = sum(pool_height[:h])
                h2 = h1 + pool_height[h]
                w1 = sum(pool_width[:w])
                w2 = w1 + pool_width[w]
                window = x[n, h1:h2, w1:w2, :]
                m = np.max(window, axis=0)
                m = np.max(m, axis=0)
                out[n,h,w,:] = m
    
    return out

In [29]:
def get_max_and_concat(*args):
    size = 0
    for arg in args:
        size += arg.shape[-1]
    final = np.zeros(size)
    
    arg_size = 0
    
    i = 0
    for arg in args:
        max_pool = max_pool_dynamic_input(np.expand_dims(arg, 0), 1, 1)[0,0, 0,:]
        final[i: arg.shape[-1]+i] = max_pool
        i+= arg.shape[-1]  
    return final
        

In [20]:
def filter_detections_same_class(output_dict, detection_labels, max_iou=.10, max_filtered=20, conf_tesh=.5, rand_select=2):
    filtered_idx = []
    detections = output_dict["detection_boxes"]
    for i in range(len(detections)):
        if(output_dict['detection_scores'][i] >= conf_tesh):
            detection = detections[i]
            add_to_filter = True
            for label in detection_labels:
                if bb_intersection_over_union(detection, label) > max_iou:
                    add_to_filter = False
                    break

            for fi in filtered_idx:
                if bb_intersection_over_union(detection, detections[fi]) > .5:
                    add_to_filter = False
                    break

            if add_to_filter:
                filtered_idx.append(i)

            if len(filtered_idx) > max_filtered:
                break
    if len(filtered_idx) > rand_select:
        filtered_idx = random.sample(filtered_idx, rand_select)
    
    output_dict['detection_boxes'] = output_dict['detection_boxes'][filtered_idx]
    output_dict['detection_classes'] = output_dict['detection_classes'][filtered_idx]
    output_dict['detection_scores'] = output_dict['detection_scores'][filtered_idx]
    
    
    return output_dict

In [21]:
def get_labels(fpath: str) :
    try:
        fpath = os.path.abspath(fpath)
        tree = ET.parse(fpath)
        root = tree.getroot()

        img_path = os.path.abspath(os.path.join(os.path.dirname(fpath), "..", root.find("filename").text))
        img_label = []
        size = root.find("size")
        width = float(size.find("width").text)
        height = float(size.find("height").text)
        for child in root.findall("object"):
            name = child.find("name").text
            if "add" in name or "remove"in name:
                bndbox = child.find("bndbox")
                xmin = float(bndbox.find("xmin").text)/width
                ymin = float(bndbox.find("ymin").text)/height
                xmax = float(bndbox.find("xmax").text)/width
                ymax = float(bndbox.find("ymax").text)/height
                img_label.append((ymin, xmin, ymax, xmax, name))
        return img_label
    except Exception as ex:
        print (ex)
        return []

In [22]:
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA) * max(0, yB - yA)

    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

In [23]:
def filter_detections_create_Ds(detections, detection_labels, max_iou=.3, max_filtered=10, rand_select=2):
    filtered_idx = []
    for i in range(len(detections)):
        detection = detections[i]
        add_to_filter = True
        for label in detection_labels:
            if bb_intersection_over_union(detection, label) > max_iou:
                add_to_filter = False
                break

        for fi in filtered_idx:
            if bb_intersection_over_union(detection, detections[fi]) > 0.1:
                add_to_filter = False
                break

        if add_to_filter:
            filtered_idx.append(i)

        if len(filtered_idx) > max_filtered:
            break
    if len(filtered_idx) > rand_select:
        filtered_idx = random.sample(filtered_idx, rand_select)
    
    return detections[filtered_idx]

In [24]:
sessions_path = "/home/wc-gpu/storage4tb/session_data_thesis/sessions160000_165000/"
processed_path = "/home/wc-gpu/storage4tb/session_data_thesis/sessions160000_165000/processed_sessions"

In [30]:
X2all = dict()
Y2all = dict()
X3all = dict()
Y3all = dict()
c_ses = 0
all_ses = len(os.listdir(processed_path))
for session in os.listdir(processed_path):
    print ("progress", session, "  : ", c_ses / all_ses, "                   \r")
    proc_path = os.path.join(processed_path, session)
    output_dicts = []
    with open(proc_path, "rb") as f:
        output_dicts = pkl.load(f)[0]
    
    X2 = []
    Y2 = []
    X3 = []
    Y3 = []
    previous_features = None
    previous_boxes = None
    prev2_features = None
    prev2_boxes = None
    for output_dict in output_dicts:
        try:

            image_name = os.path.splitext(output_dict["image_path"].split("/")[-1])[0]
            ann_path = os.path.join(sessions_path,session,"annotations", image_name + ".xml")
            
            annotations = get_labels(ann_path) if os.path.exists(ann_path) else []
            output_dict = filter_detections_same_class(output_dict, annotations)

            detection_boxes = output_dict["detection_boxes"]
            detection_scores = output_dict["detection_scores"]
            features = output_dict["features"]
            detection_boxes = filter_detections_create_Ds(detection_boxes, annotations)
            boxes = (detection_boxes, annotations)
#             print (len(detection_boxes), len(annotations))

            if previous_features is not None:
                for label in boxes[0]:
                    box_prev = previous_features[
                        floor(label[0]*previous_features.shape[0]):
                        ceil(label[2]*previous_features.shape[0]), 
                        floor(label[1]*previous_features.shape[1]):
                        ceil(label[3]*previous_features.shape[1]),:]
                    box_cur = features[
                        floor(label[0]*features.shape[0]):
                        ceil(label[2]*features.shape[0]), 
                        floor(label[1]*features.shape[1]):
                        ceil(label[3]*features.shape[1]),:]

                    max_pooled = get_max_and_concat(box_prev, previous_features, box_cur, features)
                    X2.append(max_pooled)
                    Y2.append(0)

                    if prev2_features is not None:
                        box_prev2 = prev2_features[
                            floor(label[0]*previous_features.shape[0]):
                            ceil(label[2]*previous_features.shape[0]), 
                            floor(label[1]*previous_features.shape[1]):
                            ceil(label[3]*previous_features.shape[1]),:]
                        max_pooled = get_max_and_concat(
                            box_prev2, prev2_features,
                            box_prev, previous_features,
                            box_cur, features)
                        X3.append(max_pooled)
                        Y3.append(0)



                for label in boxes[1]:
                    if("add" in label[4]):
                        box_prev = previous_features[
                            floor(label[0]*previous_features.shape[0]):
                            ceil(label[2]*previous_features.shape[0]), 
                            floor(label[1]*previous_features.shape[1]):
                            ceil(label[3]*previous_features.shape[1]),:]
                        box_cur = features[
                            floor(label[0]*features.shape[0]):
                            ceil(label[2]*features.shape[0]), 
                            floor(label[1]*features.shape[1]):
                            ceil(label[3]*features.shape[1]),:]

                        max_pooled = get_max_and_concat(box_prev, previous_features, box_cur, features)
                        X2.append(max_pooled)
                        Y2.append(1)

                        if prev2_features is not None:
                            box_prev2 = prev2_features[
                                floor(label[0]*previous_features.shape[0]):
                                ceil(label[2]*previous_features.shape[0]), 
                                floor(label[1]*previous_features.shape[1]):
                                ceil(label[3]*previous_features.shape[1]),:]

                            max_pooled = get_max_and_concat(
                                box_prev2, prev2_features,
                                box_prev, previous_features,
                                box_cur, features)
                            X3.append(max_pooled)
                            Y3.append(1)


                for label in previous_boxes[1]:
                    if("remove" in label[4]):
                        box_prev = previous_features[
                            floor(label[0]*previous_features.shape[0]):
                            ceil(label[2]*previous_features.shape[0]), 
                            floor(label[1]*previous_features.shape[1]):
                            ceil(label[3]*previous_features.shape[1]),:]
                        box_cur = features[
                            floor(label[0]*features.shape[0]):
                            ceil(label[2]*features.shape[0]), 
                            floor(label[1]*features.shape[1]):
                            ceil(label[3]*features.shape[1]),:]

                        max_pooled = get_max_and_concat(box_cur, features, box_prev, previous_features)
                        X2.append(max_pooled)
                        Y2.append(1)

                if prev2_boxes is not None:
                    for label in prev2_boxes[1]:
                        if(label[4] == "remove"):
                            box_prev = previous_features[
                                floor(label[0]*previous_features.shape[0]):
                                ceil(label[2]*previous_features.shape[0]), 
                                floor(label[1]*previous_features.shape[1]):
                                ceil(label[3]*previous_features.shape[1]),:]
                            box_cur = features[
                                floor(label[0]*features.shape[0]):
                                ceil(label[2]*features.shape[0]), 
                                floor(label[1]*features.shape[1]):
                                ceil(label[3]*features.shape[1]),:]

                            box_prev2 = prev2_features[
                                floor(label[0]*previous_features.shape[0]):
                                ceil(label[2]*previous_features.shape[0]), 
                                floor(label[1]*previous_features.shape[1]):
                                ceil(label[3]*previous_features.shape[1]),:]

                            max_pooled = get_max_and_concat(
                                box_cur, features,
                                box_prev, previous_features,
                                box_prev2, prev2_features)
                            X3.append(max_pooled)
                            Y3.append(1)

            prev2_features = previous_features
            prev2_boxes = previous_boxes
            previous_features = features
            previous_boxes = boxes

        except Exception as ex:
            raise ex
            print (session, ex)
    break
    c_ses += 1
    X2 = np.asarray(X2)
    Y2 = np.asarray(Y2)
    X3 = np.asarray(X3)
    Y3 = np.asarray(Y3)

#     print(X2.shape, Y2.shape, X3.shape, Y3.shape)
#     print ("progress", session, "  : ", c_ses / all_ses)
    clear_output()


    X2all[session] = X2
    X3all[session] = X3
    Y2all[session] = Y2
    Y3all[session] = Y3

progress 123178   :  0.0                    
(1, 2, 2, 1024)


ValueError: could not broadcast input array from shape (2,2,1024) into shape (1024)

In [None]:
with open(sessions_path + "/dataset_prod_new_model_binary_1prev_session_split_bla_2x2.pkl", "wb") as f:
    pkl.dump([X2all, Y2all], f)

In [None]:
with open(sessions_path + "/dataset_prod_new_model_binary_2prev_session_split_bla._2x2pkl", "wb") as f:
    pkl.dump([X3all, Y3all], f)