In [None]:
# --------------------------------------------------------
# RP-FEM: Relational Prior Knowledge Graphs for Detection and Instance Segmentation
# Copyright (c) 2023
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------

import json
import numpy as np

from tqdm import tqdm
from coco import COCO_CLASSES

In [None]:
ROOT_VG = "path/to/VisualGenome/"
with open(ROOT_VG + 'objects.json') as in1:
    objects_file = json.load(in1)
    
with open('SYNSET_TO_NAME_MAPPER.json') as in2:
    SYNSET_TO_NAMES_MAPPER = json.load(in2)

with open('COCO_TO_VG_SYNSET_MAPPER.json') as in3:
    COCO_TO_VG_SYNSET_MAPPER = json.load(in3)
    
with open('COCO_CLASSES_IN_VG_COUNT.json') as in4:
    COCO_CLASSES_IN_VG_COUNT = json.load(in4)

In [None]:
COCO_TO_VG_SYNSET_MAPPER_INVERSE = {}
for p in COCO_TO_VG_SYNSET_MAPPER:
    for m in COCO_TO_VG_SYNSET_MAPPER[p]:
        if m not in COCO_TO_VG_SYNSET_MAPPER_INVERSE:
            COCO_TO_VG_SYNSET_MAPPER_INVERSE[m] = [p]
        else:
            COCO_TO_VG_SYNSET_MAPPER_INVERSE[m].append(p)
            
co_occurrence_c_c, occurrence_c = {}, {}
for top_class in COCO_CLASSES_IN_VG_COUNT:
    co_occurrence_c_c[top_class] = {}
    occurrence_c[top_class] = 0
    for top_class_inner in COCO_CLASSES_IN_VG_COUNT:
        co_occurrence_c_c[top_class][top_class_inner] = 0

for c, objects in tqdm(enumerate(objects_file), total=len(objects_file)):
    objects = objects['objects']        
    for object_i in objects:
        if object_i['synsets'] == []:
            continue
        object_i_id = object_i['object_id']
        
        stripped_names_i = [synset_i.split(".")[0].replace('_', ' ') for synset_i in object_i["synsets"]]

        new_stripped_names_i = []
        for name in stripped_names_i:
            for k in COCO_TO_VG_SYNSET_MAPPER:
                key_list = COCO_TO_VG_SYNSET_MAPPER[k]
                if name in key_list:
                    if name not in new_stripped_names_i:
                        new_stripped_names_i.append(name)
        if new_stripped_names_i == []:
            continue
        for ss_i_occ in new_stripped_names_i:
            inverse_list = COCO_TO_VG_SYNSET_MAPPER_INVERSE[ss_i_occ]
            for all_inverse in inverse_list:
                occurrence_c[all_inverse] += 1

        # Iterate over neighboring objects
        for object_j in objects:
            if object_j['synsets'] == []:
                continue
            object_j_id = object_j['object_id']

            # Skip statistics with the identical object
            if object_i_id == object_j_id:
                continue
                
            stripped_names_j = [synset_j.split(".")[0].replace('_', ' ') for synset_j in object_j["synsets"]]
            new_stripped_names_j = []
            for name in stripped_names_j:
                for k in COCO_TO_VG_SYNSET_MAPPER:
                    key_list = COCO_TO_VG_SYNSET_MAPPER[k]
                    if name in key_list:
                        if name not in new_stripped_names_j:
                            new_stripped_names_j.append(name)
            if new_stripped_names_j == []:
                continue
            for ss_i in new_stripped_names_i:
                inverse_list_ss_i = COCO_TO_VG_SYNSET_MAPPER_INVERSE[ss_i]
                for all_inverse_ssi in inverse_list_ss_i:
                    for ss_j in new_stripped_names_j:
                        inverse_list_ss_j = COCO_TO_VG_SYNSET_MAPPER_INVERSE[ss_j]
                        for all_inverse_ssj in inverse_list_ss_j:
                            co_occurrence_c_c[all_inverse_ssi][all_inverse_ssj] += 1


In [None]:
# Save the co-occurrence statistics for the other relation types that need it
with open("VG_CO-OCC_COUNT.json", "w") as out1:
    json.dump(occurrence_c, out1)


In [None]:
# Normalize the co-occurrence of two object together by the amount of occurrences 
# of each object individually and build a dictionary with the co-occurrence values
weighted_co_occurrence_c_c = co_occurrence_c_c
for class_i in weighted_co_occurrence_c_c:
    for class_j in weighted_co_occurrence_c_c[class_i]:
        occurrence_class_i = occurrence_c[class_i]
        occurrence_class_j = occurrence_c[class_j]
        if weighted_co_occurrence_c_c[class_i][class_j] > 0:
            weighted_co_occurrence_c_c[class_i][class_j] /= (occurrence_class_i + occurrence_class_j) 

values = []
for c, row in enumerate(weighted_co_occurrence_c_c):
    values_ = []
    for column in weighted_co_occurrence_c_c[row]:
        values_.append(weighted_co_occurrence_c_c[row][column])
    values.append(values_)

values = np.array(values)
values /= np.amax(values)

values_dict = {}
for c1, row in enumerate(list(COCO_CLASSES.values())[1:]):
    values_dict[row] = {}
    for c2, column in enumerate(list(COCO_CLASSES.values())[1:]):
        values_dict[row][column] = values[c1][c2]


In [None]:
# Save results
with open("co-occurrence.json", "w") as out2:
    json.dump(values_dict, out2)