In [1]:
import json
import numpy as np
from tqdm import tqdm
from time import sleep

# load the subset of the dataset
data = json.load(open('../data/subsets/visdial_1.0_train_10percent_subset.json'))['data']
dialogs = data['dialogs']
image_paths = [index['image_id'] for index in data['dialogs']]

In [2]:
'''Instance segmentation for all images in the subset dataset
Mask R-CNN is used in this case with ResNet-50-FPN backbone'''
# import some common detectron2 utilities, some image processing libraries, and setup detectron2 logger
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import cv2
import torch
import detectron2
from detectron2.utils.visualizer import Visualizer
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2 import model_zoo
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.logger import setup_logger
setup_logger()
%matplotlib inline

'''Takes in pred_boxes Box tensors and converts them to a list of integer [x, y, x_width, y_height] (where the order of the list is maintained while creation) for cropping the image'''
def int_box_coordinates(pred_boxes):
    box_coordinates = []
    for pred_box in pred_boxes:
        # x,y is the top left corner of the box and x_width, y_height is the bottom right corner of the box
        # print(pred_box.cpu().numpy()[0])
        box_coordinates.append([int(pred_box.cpu().numpy()[0]),
                                int(pred_box.cpu().numpy()[1]),
                                int(pred_box.cpu().numpy()[2]),
                                int(pred_box.cpu().numpy()[3])])
    return box_coordinates


'''Takes in an image path and returns a list of [y:y_height, x:x_width] based on all the image segments from a detectron2 model'''
def instance_segmentation(image_path):
    # load the image from the path provided
    img = cv2.imread(image_path)
    # create a configuration for the model
    cfg = get_cfg()
    # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
    cfg.merge_from_file(model_zoo.get_config_file(
        "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
    # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
    # cfg.MODEL.DEVICE = "cpu"  # because I don't have a local GPU
    predictor = DefaultPredictor(cfg)
    outputs = predictor(img)
    # look at the outputs. See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format for specification
    pred_boxes = outputs["instances"].pred_boxes
    return int_box_coordinates(pred_boxes)

# run the instance segmentation on the images
instance_boxes = [instance_segmentation(image_path) for image_path in tqdm(image_paths)]

model_final_a3ec72.pkl: 254MB [00:12, 20.7MB/s]                           
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 12329/12329 [3:50:54<00:00,  1.12s/it] 


In [3]:
# save the instance segments on the subset_data
with open('../embeddings/images/instance/10/res101_bounding_boxes.json', 'w') as outfile:
    json.dump(instance_boxes, outfile)

# note that the order of the coordinates are x, y, x_width, y_height


In [4]:
import json
import numpy as np
from tqdm import tqdm
from time import sleep

# load the instance segments on the subset_data
instance_boxes = json.load(open('../embeddings/images/instance/10/res101_bounding_boxes.json'))

In [5]:
# https://codereview.stackexchange.com/questions/31352/overlapping-rectangles
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y


class Rect(object):
    def __init__(self, p1, p2):
        '''Store the top, bottom, left and right values for points 
               p1 and p2 are the (corners) in either order
        '''
        self.left = min(p1.x, p2.x)
        self.right = max(p1.x, p2.x)
        self.bottom = min(p1.y, p2.y)
        self.top = max(p1.y, p2.y)

    @staticmethod
    def overlap(r1, r2):
        '''Overlapping rectangles overlap both horizontally & vertically
        '''
        h_overlaps = (r1.left <= r2.right) and (r1.right >= r2.left)
        v_overlaps = (r1.bottom <= r2.top) and (r1.top >= r2.bottom)
        return h_overlaps and v_overlaps


In [6]:
[len(i) for i in instance_boxes].count(0)
# seems like there's 10 items with no instance segments


82

In [7]:
# indices of the items with no instance segments
list(np.where(np.array([len(i) for i in instance_boxes]) == 0)[0])


[67,
 107,
 161,
 276,
 350,
 374,
 450,
 584,
 610,
 766,
 885,
 1550,
 1573,
 1626,
 1978,
 2629,
 2632,
 2675,
 2779,
 2799,
 3049,
 3166,
 3308,
 3454,
 3612,
 3682,
 3717,
 3867,
 3964,
 4343,
 4381,
 4539,
 4631,
 4920,
 5177,
 5303,
 5580,
 6047,
 6121,
 6399,
 6423,
 6490,
 6587,
 6660,
 6709,
 6935,
 7006,
 7012,
 7127,
 7163,
 7464,
 7644,
 7991,
 8096,
 8111,
 8181,
 8204,
 8209,
 8401,
 8541,
 8725,
 8743,
 8843,
 8959,
 9029,
 9037,
 9069,
 9105,
 9187,
 9542,
 9579,
 10168,
 10430,
 10753,
 11000,
 11245,
 11305,
 11701,
 11833,
 12039,
 12111,
 12127]

In [8]:
edge_list = []
for image_instances in tqdm(instance_boxes):  # n segments for each image
    interactions = []
    # apparently there are some images with no instance segments
    if len(image_instances) != 0:
        # initialize with the first interaction as a list
        interactions_list = [image_instances[0]]
    for instance in image_instances[1:]:  # n-1 interactions for each image
        for each_interaction in interactions_list:
            # check if the current interaction overlaps with any of the previous interactions
            # note that the order of the coordinates from the list are y:y_height, x:x_width
            l1 = Point(instance[2], instance[0])
            r1 = Point(instance[3], instance[1])
            l2 = Point(each_interaction[2], each_interaction[0])
            r2 = Point(each_interaction[3], each_interaction[1])
            # if it does, add an edge between the two where edge index is based on its position in the image_instances list
            if Rect.overlap(Rect(l1, r1), Rect(l2, r2)):
                interactions.append([image_instances.index(
                    each_interaction), image_instances.index(instance)])
            # print("Checking between", [image_instances.index(each_interaction), image_instances.index(instance)])
        # append the current interaction to the interactions_list
        interactions_list.append(instance)
    edge_list.append(interactions)


100%|██████████| 12329/12329 [00:03<00:00, 3871.42it/s]


In [9]:
# save the instance segments on the subset_data
with open('../embeddings/images/instance/10/res101_edge_list.json', 'w') as outfile:
    json.dump(edge_list, outfile)


In [10]:
adj_matrix = []
for element in edge_list:
    # size here is
    size = len(instance_boxes[edge_list.index(element)])
    # initialize adjacency matrix
    adj_matrix_element = [[0 for i in range(size)] for j in range(size)]
    # add edges to the adjacency matrix
    if len(element) != 0:
        for row, col in element:
            adj_matrix_element[row][col] = 1
    # add the adjacency matrix to the list of adjacency matrices
    adj_matrix.append(adj_matrix_element)


In [11]:
# save the instance segments on the subset_data
with open('../embeddings/images/instance/10/res101_adj_list.json', 'w') as outfile:
    json.dump(adj_matrix, outfile)
