# Conversion of the dataset into predicates

In [7]:
import json
import os
import random
from IPython.display import display
from pyswip import *
import subprocess

## Loading training and validation scenes and questions

In [8]:
with open("/media/murali/My Passport/GQA/sceneGraphs/train_sceneGraphs.json", 'r') as file:
    train_scene = json.load(file)

with open("/media/murali/My Passport/GQA/sceneGraphs/val_sceneGraphs.json", 'r') as file:
    val_scene= json.load(file)
    
with open("/media/murali/My Passport/GQA/questions1.2/train_balanced_questions.json", 'r') as file:
    train_ques = json.load(file)

with open("/media/murali/My Passport/GQA/questions1.2/val_balanced_questions.json", "r") as file:
    val_ques = json.load(file)

## Enumerating the Semantic operations

In [11]:
all_sem_ops = []

for qkey in train_ques:
    question = train_ques[qkey]
    sem_ops = question["semantic"]
    for sem_op in sem_ops:
        all_sem_ops.append(sem_op["operation"])

for qkey in val_ques:
    question = val_ques[qkey]
    sem_ops = question["semantic"]
    for sem_op in sem_ops:
        all_sem_ops.append(sem_op["operation"])
        
all_sem_ops = sorted(list(set(all_sem_ops)))

print("Total semantic operations: {}".format(len(all_sem_ops)))

Total semantic operations: 136


## Enumerating the Objects, Relations and Attributes

In [12]:
all_relations = []
all_attributes = []
all_objects = []

for skey in train_scene:
    objects = train_scene[skey]["objects"]
    for okey in objects:
        for relations in objects[okey]["relations"]:
            all_relations.append(relations["name"])
        all_attributes += objects[okey]["attributes"]
        all_objects.append(objects[okey]["name"])
        
for skey in val_scene:
    objects = val_scene[skey]["objects"]
    for okey in objects:
        for relations in objects[okey]["relations"]:
            all_relations.append(relations["name"])
        all_attributes += objects[okey]["attributes"]
        all_objects.append(objects[okey]["name"])

all_relations = sorted(list(set(all_relations)))
all_attributes = sorted(list(set(all_attributes)))
all_objects = sorted(list(set(all_objects)))

print( "Number of objects: {}\n\
Number of attributes: {}\n\
Number of relations: {}".format(len(all_objects), len(all_attributes), len(all_relations)))


Number of objects: 1703
Number of attributes: 617
Number of relations: 310


## Writing all the constants to a file

In [13]:
with open("./all_objects.txt", "w") as file:
    for objects in all_objects:
        file.write(objects+"\n")
        
with open("./all_attributes.txt", "w") as file:
    for attr in all_attributes:
        file.write(attr+"\n")

with open("./all_relations.txt", "w") as file:
    for rel in all_relations:
        file.write(rel+"\n")
        
with open("./all_sem_ops.txt", "w") as file:
    for ops in all_sem_ops:
        file.write(ops+"\n")

## Enumerating the location and weather constants 

In [14]:
all_weather = []
all_location = []
for skey in train_scene:
    try:
        all_weather.append(train_scene[skey]["weather"])
    except:
        pass
    
    try:
        all_location.append(train_scene[skey]["location"])
    except:
        pass

all_weather = set(all_weather)
all_location = set(all_location)
print(all_weather)
print(all_location)

{'cloudy', 'dark', 'clear', 'overcast', 'cloudless', 'sunny', 'foggy', 'partly cloudy', 'stormy', 'rainy'}
{'indoors', 'outdoors'}


## Extracting Binary questions

In [15]:
binary_ques = {}
binary_sem_operations = []
for qkey in train_ques:
    answer = train_ques[qkey]["answer"]
    if answer == 'yes' or answer == 'no':
        binary_ques[qkey] = train_ques[qkey]
        sem_ops = train_ques[qkey]["semantic"]
        for sem_op in sem_ops:
            binary_sem_operations.append(sem_op["operation"])

binary_sem_operations = sorted(list(set(binary_sem_operations)))
for op in binary_sem_operations:
    print(op)

print(len(binary_ques))

and
different
different color
different shape
exist
filter
filter activity
filter age
filter brightness
filter cleanliness
filter color
filter company
filter depth
filter event
filter face expression
filter fatness
filter flavor
filter gender
filter hardness
filter height
filter hposition
filter length
filter liquid
filter material
filter opaqness
filter orientation
filter pattern
filter pose
filter race
filter realism
filter room
filter shape
filter size
filter sport
filter sportActivity
filter state
filter thickness
filter tone
filter vposition
filter weather
filter weight
filter width
or
relate
same
same color
same material
same shape
select
verify
verify activity
verify age
verify brightness
verify cleanliness
verify color
verify company
verify depth
verify face expression
verify fatness
verify flavor
verify gender
verify hardness
verify height
verify hposition
verify length
verify location
verify material
verify opaqness
verify pattern
verify place
verify pose
verify race
verify r

## Setting up constants

Here we set up constants for objects, attributes, weather, locations, horizontal and vertical positions. The object, attribute, weather and location constants are sourced from the dataset. We consider ("left", "right"), ("top", "bottom") for horizontal position and the vertical position constants respectively.

In [16]:
objectsConst = {val: k for k, val in enumerate(all_objects)}
attributeConst = {val : k for k, val in enumerate(all_attributes)}
weatherConst = {val : k for k, val in enumerate(all_weather)}
locationConst = {val : k for k, val in enumerate(all_location)}
hposConst = {"left" : 0, "right" : 1}
vposConst = {"top" : 0, "bottom" : 1, "middle" : 2}

Constants = {"O" : objectsConst, "A" : attributeConst, "W" : weatherConst, "L" : locationConst, "H" : hposConst, "V" : vposConst}

## Predicates and Knowledge base Class

In [17]:
class predicate:
    def __init__(self, name, arity, var_types):
        self.name = name
        self.arity = arity
        self.var_types = var_types
        
    def printPred(self):
        print("{}(".format(self.name), end="")
        for i in range(self.arity-1):
            print("{}, ".format(self.var_types[i]), end="")
        print("{})".format(self.var_types[-1]))
        
class knowledgeBase:
    def __init__(self, sceneID):
        self.sceneID = sceneID
        self.predicateList = dict()
        self.bg = dict()
    
    def addPredicate(self, pred):
        self.predicateList[pred.name] = pred

        
    def addBackground(self, bg):
        # bg = ["name", "obj1", "obj2"]
        try:
            if bg[0] == "weather" or bg[0] == "location" or bg[0] == "exist":
                self.bg[bg[0]].append((bg[1]))
            else:
                self.bg[bg[0]].append((bg[1], bg[2]))
        except:
            if bg[0] == "weather" or bg[0] == "location" or bg[0] == "exist":
                self.bg[bg[0]] = [(bg[1])]
            else:
                self.bg[bg[0]] = [(bg[1], bg[2])]
    

## Generating global predicates and background knowledge

In [18]:
globalPreds = {"same" : predicate("same", 2, ["A", "A"])}

globalKB = knowledgeBase("global")
globalKB.addPredicate(globalPreds["same"])

for attr in all_attributes:
    attrID = attributeConst[attr]
    bg = ["same", attrID, attrID]
    globalKB.addBackground(bg)

## Generating predicates for object relations and attributes

In [19]:
relationPreds = {rels: predicate(rels, 2, ["O", "O"]) for rels in all_relations}
attributePreds = {"attribute" : predicate("attribute", 2, ["O", "A"])}
objectPreds = {"object" : predicate("object", 2, ["O", "O"])}
weatherPreds = {"weather" : predicate("weather", 1, ["W"])}
locationPreds = {"location" : predicate("location", 1, ["L"])}
hposPreds = {"hpos" : predicate("hpos", 2, ["O", "H"])}
vposPreds = {"vpos" : predicate("vpos", 2, ["O", "V"])}
existPreds = {"exist" : predicate("exist", 1, ["O"])}

## Generating the knowledge base for every scene

In [20]:
def determineHPos(xpos, width):
    if xpos < width / 2:
        return ("left")
    else:
        return ("right")

def determineVPos(ypos, height):
    if ypos < height / 3:
        return ("top")
    elif ypos >= height / 3 and ypos < (2/3)*height:
        return ("middle")
    else:
        return ("bottom")

In [21]:
trainSceneData = {}

for i, sid in enumerate(train_scene):
    height, width = train_scene[sid]["height"], train_scene[sid]["width"]
    sceneKB = knowledgeBase(sid)
    try:
        weather = train_scene[sid]["weather"]
        wID = weatherConst[weather]
        w_bg = ["weather", wID]
        sceneKB.addPredicate(weatherPreds["weather"])
        sceneKB.addBackground(w_bg)
    except:
        pass
    
    try:
        location = train_scene[sid]["location"]
        lID = locationConst[location]
        l_bg = ["location", lID]
        sceneKB.addPredicate(locationPreds["location"])
        sceneKB.addBackground(l_bg)
    except:
        pass
    
    objects = train_scene[sid]["objects"]
    local_map = { okey : [i, objectsConst[objects[okey]["name"]]] for i, okey in enumerate(objects) }
    local_map["null"] = [len(objects), "null"]
    for okey in objects:
        obj1ID = local_map[okey][0]
        objName = local_map[okey][1]
        sceneKB.addPredicate(objectPreds["object"])
        sceneKB.addBackground(["object", obj1ID, objName])
        
        x, y = objects[okey]["x"], objects[okey]["y"]
        hpos = hposConst[determineHPos(x, width)]
        vpos = vposConst[determineVPos(y, height)]
        
        o_bg = ["exist", obj1ID]
        sceneKB.addPredicate(existPreds["exist"])
        sceneKB.addBackground(o_bg)
        
        h_bg = ["hpos", obj1ID, hpos]
        v_bg = ["vpos", obj1ID, vpos]
        sceneKB.addPredicate(vposPreds["vpos"])
        sceneKB.addBackground(v_bg)
        sceneKB.addPredicate(hposPreds["hpos"])
        sceneKB.addBackground(h_bg)
        
        for attr in objects[okey]["attributes"]:
            attrID = attributeConst[attr]
            bg = ["attribute", obj1ID, attrID]
            sceneKB.addPredicate(attributePreds["attribute"])
            sceneKB.addBackground(bg)
        
        for relation in objects[okey]["relations"]:
            obj2ID = local_map[relation["object"]][0]
            bg = [relation["name"].replace(" ", "_"), obj1ID, obj2ID]
            sceneKB.addPredicate(relationPreds[relation["name"]])
            sceneKB.addBackground(bg)
    
    trainSceneData[sid] = {"KB" : sceneKB, "local map" : local_map}

## Sample binary questions

In [22]:
random.seed(1056)
N_ques = 10
sample_ques_key = random.sample(list(binary_ques.keys()), N_ques)
for key in sample_ques_key:
    print(key)
    question = binary_ques[key]
    ques = question["question"]
    semString = question["semanticStr"]
    ans = question["answer"]
    print("{}\n{}\nAnswer: {}".format(ques, semString, ans))

1091333
Are there either flags or books?
select: flag (983557)->exist: ? [0]->select: book (-) ->exist: ? [2]->or:  [1, 3]
Answer: yes
03616644
Are there either any tea kettles or containers?
select: container (-)->exist: ? [0]->select: tea kettle (-) ->exist: ? [2]->or:  [1, 3]
Answer: no
02275312
Do you see animals to the right of the grazing animal on the grass?
select: grass (1830805)->relate: animal,on,s (1708132) [0]->filter: grazing [1]->relate: animals,to the right of,s (2643211) [2]->exist: ? [3]
Answer: no
04700826
Is the bacon to the left of the cheese in the middle of the photo?
select: bacon (3079570)->verify rel: cheese,to the left of,o (2680771) [0]
Answer: yes
07552336
Are there any toilets to the right of the water hose?
select: hose (467876)->relate: toilet,to the right of,s (-) [0]->exist: ? [1]
Answer: no
06570691
Do you see vans to the right of the black truck?
select: truck (1811853)->filter color: black [0]->relate: van,to the right of,s (-) [1]->exist: ? [2]
Ans

In [23]:
not_operation = []
all_args = []
count = 0
op_keys = []
for key in binary_ques:
    question = binary_ques[key]
    semOperations = question["semantic"]
    for op in semOperations:
        operation = op["operation"]
        if operation == "same":
            print(key)
            print(question["question"])
            print(question["semanticStr"])
            count += 1
            op_keys.append(key)
            
    if count >= 100:
        break

# all_args = sorted(list(set(all_args)))
# print(all_args)
        

15867119
Are these two animals of the same species?
select: animal (1249437,1249430)->same: type [0]
051001757
Are the two animals of the same type?
select: animal (2547536,3608858)->same: type [0]
16451217
Are both these animals of the same type?
select: animal (991976,991990)->same: type [0]
0128094
Are the animals horses?
select: animal (1273572,1273585)->same: type [0]
0485991
Are the animals cows?
select: animal (1049558,1049556,1049562,1049568)->same: type [0]
1469734
Do these people have the same gender?
select: person (1897990,1933021,1876509)->same: gender [0]
14459036
Do all the animals have the same type?
select: animal (2334582,1870044,1944619,2031407)->same: type [0]
14513521
Are the animals in this scene all of the same species?
select: animal (908481,908479,908473,908475,908477)->same: type [0]
02884662
Are both the animals of the same type?
select: animal (1193030,1193048)->same: type [0]
04889592
Are the people the same gender?
select: person (1603439,1603434,1603435,1

## Functional form to FOL rule

The procedure defined below converts a question in its functional form into a FOL clause defining the target predicate. For example, for the question "Is the grass green and tall?" having the functional form "`select: grass (4569011)->verify color: green [0]->verify height: tall  [0]->and:  [1, 2]`" the FOL clause is as follows:
<div align="center">target$(X, Y, Z) \gets$ attribute$(X, Y)$, attribute$(X, Z)$</div>
With the query values being $X = $ grass, $Y = $ green, and $Z =$ tall. For binary question, the following operations are considered:

* select
* exist
* filter
* relate
* verify
    * location
    * weather
    * hposition
    * vposition
    * rel
    * "attribute"
* and
* or
* same
* different

Each operation stated above has an argument and a dependent variable indicated by the number within the square brackets - in the above example for the operation `verify color` the argument was "green" and the dependent variable was "grass". Let $A$ and $D$ denote the argument and the dependent variable for a given operation, the order in which there variables are used in the corresponding predicate is dependent on the operation. For all operations but `relate` the order is predicate$(D, A)$ and is the exact opposite for `relate`. For example, consider the excerpt `select: grass (4569011)->verify color: green [0]`, this would translate to `attribute(grass, green)` where `select: man (1163880)->relate: pitcher,next to,s (1163889)` would result in `next_to(pitcher, man)`.

In [24]:
def ff2Clause(question, trainSceneData):
    imageId = question["imageId"]
    sceneData = trainSceneData[imageId]
    semOperations = question["semantic"]
    local_map = sceneData["local map"]
    
    target = {"args" : None, "body" : [], "query" : None}
    arg_stack = {}
    var_dict = {}
    
    var_map = {i : chr(ord('A') + i) for i in range(26)}
    for i in range(26):
        var_map[i + 26] = chr(ord('A') + i)*2
        var_map[i + 52] = chr(ord('A') + i)*3
    v_count = 0
    for i, op in enumerate(semOperations):
        operation = op["operation"]
        dependencies = op["dependencies"]
        arguments = op["argument"]
    
        operation_list = operation.split(" ")
        if operation_list[0] == "select":
            if len(target["body"]) == 0:
                target["body"].append([])
                target["body"][-1].append(i)
            elif len(target["body"][-1]) > 1:
                target["body"].append([])
                target["body"][-1].append(i)

            args_list = arguments.split("(")
            if len(args_list) > 1:
                okeys = args_list[1].rstrip(" ").rstrip(')')
                if okeys == "-":
                    okey = "null"
                    objID = local_map[okey][0]
                    arg_stack[i] = [[args_list[0], var_map[v_count], "O"]]
                    var_dict[var_map[v_count]] = objID
                    v_count += 1
                else:
                    arg_stack[i] = []
                    okeys_list = okeys.split(",")
                    for okey in okeys_list:
                        objID = local_map[okey][0]
                        arg_stack[i].append([args_list[0], var_map[v_count], "O"])
                        var_dict[var_map[v_count]] = objID
                        v_count += 1
            else:
                arg_stack[i] = ["scene"]
        
        elif operation_list[0] == "exist":
            dep_arg = arg_stack[dependencies[0]][0]
            predicate = {"prefix" : None, "pred" : existPreds["exist"], "name": "exist", "var" : (dep_arg[1])}
            target["body"][-1].append(predicate)
            target["body"]
            target["body"][-1][0] = i
        
        elif operation_list[0] == "filter":
            dep_arg = arg_stack[dependencies[0]][0]
            arg_stack[i] = [dep_arg] 
#             arg_list = arguments.split(" ")
            predicate = {"prefix" : None}
            if arguments[:4] == "not(":
                attr = arguments.lstrip("not").lstrip("(").rstrip(")")
                attrID = attributeConst[attr]
                predicate["prefix"] = "not"
                predicate["name"] = "attribute"
                predicate["pred"] = attributePreds["attribute"]
            else:
                if len(operation_list) == 2 and operation_list[1] == "hposition":
                    attr = arguments
                    attrID = hposConst[attr]
                    predicate["name"] = "hpos"
                    predicate["pred"] = hposPreds["hpos"]
                    
                elif len(operation_list) == 2 and operation_list[1] == "vposition":
                    attr = arguments
                    attrID = vposConst[attr]
                    predicate["name"] = "vpos"
                    predicate["pred"] = vposPreds["vpos"]
                else:
                    attr = arguments
                    attrID = attributeConst[attr]
                    predicate["name"] = "attribute"
                    predicate["pred"] = attributePreds["attribute"]

            predicate["var"] = (dep_arg[1], var_map[v_count])
            target["body"][-1].append(predicate)
            target["body"][-1][0] = i
            var_dict[var_map[v_count]] = attrID
            v_count += 1
    
        elif operation_list[0] == "relate":
            dep_arg = arg_stack[dependencies[0]][0]
            arg_list = arguments.split(",")
            relation = arg_list[1]
            okey_2 = arg_list[2].split(" ")[1].lstrip("(").rstrip(")")
            if okey_2 == "-":
                okey_2 = "null"
            objID = local_map[okey_2][0]
            arg_stack[i] = [[arg_list[0], var_map[v_count], "O"]]
            
            predicate = {"prefix" : None, "name" : relation.replace(" ", "_"), "pred" : relationPreds[relation], "var" : (var_map[v_count] , dep_arg[1])}
            target["body"][-1].append(predicate)
            target["body"][-1][0] = i
            var_dict[var_map[v_count]] = objID
            v_count += 1
        
        elif operation_list[0] == "verify":
            if len(operation_list) == 2 and operation_list[1] == "location":
                argID = locationConst[arguments]
                predicate = {"prefix" : None, "name" : "location", "pred" : locationPreds["location"], "var" : (var_map[v_count])}
                target["body"][-1].append(predicate)
                target["body"][-1][0] = i
                var_dict[var_map[v_count]] = argID
                v_count += 1
        
            elif len(operation_list) == 2 and operation_list[1] == "weather":
                dep_arg = arg_stack[dependencies[0]][0]
                if dep_arg == "scene":
                    argID = weatherConst[arguments]
                    predicate = {"prefix" : None, "name" : "weather", "pred" : weatherPreds["weather"], "var" : (var_map[v_count])}
                    target["body"][-1].append(predicate)
                    target["body"][-1][0] = i
                    var_dict[var_map[v_count]] = argID
                    v_count += 1
                else:
                    argID = attributeConst[arguments.lstrip().rstrip()]
                    predicate = {"prefix" : None, "name" : "attribute", "pred" : attributePreds["attribute"], "var" : (dep_arg[1], var_map[v_count])}
                    target["body"][-1].append(predicate)
                    target["body"][-1][0] = i
                    var_dict[var_map[v_count]] = argID
                    v_count += 1
                    
            elif len(operation_list) == 2 and operation_list[1] == "hposition":
                dep_arg = arg_stack[dependencies[0]][0]
                argID = hposConst[arguments]
                predicate = {"prefix" : None, "name" : "hpos", "pred" : hposPreds["hpos"], "var" : (dep_arg[1], var_map[v_count])}
                target["body"][-1].append(predicate)
                target["body"][-1][0] = i
                var_dict[var_map[v_count]] = argID
                v_count += 1
                
            elif len(operation_list) == 2 and operation_list[1] == "vposition":
                dep_arg = arg_stack[dependencies[0]][0]
                argID = vposConst[arguments]
                predicate = {"prefix" : None, "name" : "vpos", "pred" : vposPreds["vpos"], "var" : (dep_arg[1], var_map[v_count])}
                target["body"][-1].append(predicate)
                target["body"][-1][0] = i
                var_dict[var_map[v_count]] = argID
                v_count += 1
                    
            elif len(operation_list) == 2 and operation_list[1] == "rel":
                dep_arg = arg_stack[dependencies[0]][0]
                arg_list = arguments.split(",")
                relation = arg_list[1]
                okey_2 = arg_list[2].split(" ")[1].lstrip("(").rstrip(")")
                if okey_2 == "-":
                    okey_2 = "null"
                objID = local_map[okey_2][0]
                
                predicate = {"prefix" : None, "name" : relation.replace(" ","_"), "pred" : relationPreds[relation], "var" : (dep_arg[1], var_map[v_count])}
                target["body"][-1].append(predicate)
                target["body"][-1][0] = i
                var_dict[var_map[v_count]] = objID
                v_count += 1
            
            elif len(operation_list) == 2 and operation_list[1] == "place":
                pass
            
            else:
                dep_arg = arg_stack[dependencies[0]][0]
                attr = arguments.lstrip().rstrip()
                attrID = attributeConst[attr]
                predicate = {"prefix" : None, "name" : "attribute", "pred" : attributePreds["attribute"], "var" : (dep_arg[1], var_map[v_count])}
                target["body"][-1].append(predicate)
                target["body"][-1][0] = i
                var_dict[var_map[v_count]] = attrID
                v_count += 1
        
        elif operation_list[0] == "and":
            k = 0
            it = 0
            body = [dependencies[1]]
            while k < len(dependencies) and it < len(target["body"]):
                if target["body"][it][0] == dependencies[k]:
                    t_body = target["body"].pop(it)
                    del t_body[0]
                    body += t_body
                    k += 1
                elif target["body"][it][0] < dependencies[k]:
                    it += 1
                else:
                    k += 1
            
            target["body"].append(body)
        
        elif operation_list[0] == "or":
            assert len(target["body"]) == len(dependencies)
            
        elif operation_list[0] == "same":
            if len(operation_list) > 1:
                for dep in dependencies:
                    dep_arg = arg_stack[dep]
                    for da in dep_arg:
                        predicate = {"prefix" : None, "name" : "attribute", "pred" : attributePreds["attribute"], "var" : (da[1], var_map[v_count])}
                        target["body"][-1].append(predicate)
                        target["body"][-1][0] = i
                        var_dict[var_map[v_count]] = "query"
            else:
                for dep in dependencies:
                    dep_arg = arg_stack[dep]
                    for da in dep_arg:
                        predicate = {"prefix" : None, "name" : "object", "pred" : objectPreds["object"], "var" : (da[1], var_map[v_count])}
                        target["body"][-1].append(predicate)
                        target["body"][-1][0] = i
                        var_dict[var_map[v_count]] = "query"
                
            
            v_count += 1
            
        elif operation_list[0] == "different":
            diff_var = []
            if len(operation_list) > 1:
                for dep in dependencies:
                    dep_arg = arg_stack[dep]
                    for da in dep_arg:
                        predicate = {"prefix" : None, "name" : "attribute", "pred" : attributePreds["attribute"], "var" : (da[1], var_map[v_count])}
                        target["body"][-1].append(predicate)
                        target["body"][-1][0] = i
                        var_dict[var_map[v_count]] = "query"
                        diff_var.append(var_map[v_count])
                        v_count += 1
            else:
                for dep in dependencies:
                    dep_arg = arg_stack[dep]
                    for da in dep_arg:
                        predicate = {"prefix" : None, "name" : "object", "pred" : objectPreds["object"], "var" : (da[1], var_map[v_count])}
                        target["body"][-1].append(predicate)
                        target["body"][-1][0] = i
                        var_dict[var_map[v_count]] = "query"
                        diff_var.append(var_map[v_count])
                        v_count += 1
            
            for i in range(1, len(diff_var)):
                predicate = {"prefix" : "not", "name" : "same", "pred" : globalPreds["same"], "var" : (diff_var[i-1], diff_var[i])}
                target["body"][-1].append(predicate)
                target["body"][-1][0] = i

    target["args"] = []
    target["query"] = []
    for arg in var_dict:
        target["args"].append(arg)
        target["query"].append(var_dict[arg])

    return target
        

## Testing function ff2Clause

In [25]:
key = sample_ques_key[-2]
question = binary_ques["1191591"]

target = ff2Clause(question, trainSceneData)
print(target)

{'args': ['A', 'B'], 'body': [[2, {'prefix': 'not', 'name': 'attribute', 'pred': <__main__.predicate object at 0x7f6b7e0f9320>, 'var': ('A', 'B')}, {'prefix': None, 'pred': <__main__.predicate object at 0x7f6c4705ff60>, 'name': 'exist', 'var': 'A'}]], 'query': [0, 338]}


## Converting binary questions to target predicates

In [26]:
ques_target = {}
for i, qkey in enumerate(binary_ques):
    try:
        question = binary_ques[qkey]
        ques_target[qkey] = {"target" : ff2Clause(question, trainSceneData), "answer" : question["answer"]}
    except KeyError as e:
        print(e, i)

print("done")

'outside' 2197
'over' 20551
'over' 23747
'beyond' 28887
'looking' 32902
'looking' 33064
'in the middle of' 36532
'looking' 37141
'living room' 38355
'dining room' 42384
'looking' 42743
'over' 58888
'over' 60879
'over' 73295
'looking' 74297
'over' 88800
'over' 90540
'in the center of' 91389
'in the middle of' 103934
'outside' 107547
'along' 108612
'over' 114786
'looking' 116042
'over' 117522
'looking' 119177
'looking' 120382
'looking' 120459
'outside' 125887
'in the middle of' 125922
'looking' 131255
'looking' 131841
'looking' 136188
'looking' 141930
'looking' 142638
'down' 153709
'looking' 158634
'outside' 173627
'looking' 180666
'looking' 185086
78 187717
'looking' 190905
'outside' 197432
'over' 202085
'looking' 208085
'looking' 208812
'looking' 211021
'looking' 214577
'looking' 217605
'looking' 223016
'looking' 225797
'beyond' 227067
'looking' 230007
'outside' 233124
78 236776
'over' 245722
'looking' 246587
'over' 254057
'over' 255925
'looking' 257024
'looking' 257330
'looking' 26174

## Dataset to prolog

In [27]:
def queryAnswerAlt(question, BG):
    bg = BG["KB"].bg    
    
    with open("./sceneData/sceneData.pl", "w") as file:
        for pred in bg:
            if pred == "exist" or pred == "location" or pred == "weather":
                for arg in bg[pred]:
                    factStr = pred + "(" + str(arg) + ")"
                    file.write(factStr + ".\n")
#                     print(factStr)
            else:
                for arg in bg[pred]:
                    factStr = pred+ "(" + str(arg[0]) + "," + str(arg[1]) + ")"
                    file.write(factStr + ".\n")
#                     print(factStr)
        file.write("same(X, Y) :- X == Y.\n")
        head = "target("
        for arg in question["target"]["args"]:
            head += arg + ","
        target = head[:-1] + ') :- ' 

        for rule in question["target"]["body"]:
            rString = ""
            for predicate in rule[1:]:
                p_name = predicate["name"]
                pString = p_name + "("
                if p_name == "exist" or p_name == "location" or p_name == "weather":
                    pString += predicate["var"] + ")"
                else:
                    pString += predicate["var"][0] + "," + predicate["var"][1] + ")"

                if predicate["prefix"] == "not":
                    pString = "not(" + pString + ")"

                pString += ","
                rString += pString

            target += rString
            target = target[:-1] + " ; "

        target = target[:-2]        
        file.write(target + ".\n")
        
        qString = "target("
        for qarg in question["target"]["query"]:
            if qarg == "query":
                qString += "_,"
            else:
                qString += str(qarg) + ","
        qString = qString[:-1] + ")"
        file.write("go :- " + qString + ".")
        
#         print(target)
#         print(qString)
        
    out = subprocess.run(["swipl", "-s", "/media/murali/stuff/projects/explainable_ai/datasets/GQA/sceneData/sceneData.pl", "-g", "go", "-t", "halt"], capture_output=True)
    output = out.stderr
    if len(output) == 0:
        return "yes", target, qString
    else:
        return "no", target, qString

In [71]:
with open("./wrong_ans.txt", "w") as file:

    count = 0
#     for i, qkey in enumerate(op_keys):
#     for i, qkey in enumerate(["18808141"]):
    for i, qkey in enumerate(ques_target):
        question = ques_target[qkey]
        imageId = train_ques[qkey]["imageId"]
        sceneData = trainSceneData[imageId]
        p, target, qString = queryAnswerAlt(question, sceneData)
        print(i, p, question["answer"])
        if p == question["answer"]:
            count += 1
        else:
            file.write("{}\n".format(i))
            file.write("Predicted Answer: {}\tRight Answer: {}\n".format(p, question["answer"]))
            file.write("Question: {}\n".format(train_ques[qkey]["question"]))
            file.write("FF      : {}\n".format(train_ques[qkey]["semanticStr"]))
            file.write(target+"\n")
            file.write(qString+"\n")
            file.write("\n")
            file.write("\n")

        if i == 999:
            break

0 yes yes
1 no no
2 yes yes
3 no no
4 yes yes
5 yes yes
6 yes yes
7 yes yes
8 no no
9 no yes
10 no no
11 yes yes
12 yes yes
13 yes yes
14 yes yes
15 yes yes
16 no no
17 no no
18 yes yes
19 no no
20 yes no
21 yes yes
22 no no
23 yes yes
24 yes yes
25 yes yes
26 no no
27 yes yes
28 yes no
29 yes yes
30 yes yes
31 no no
32 no no
33 yes yes
34 yes no
35 yes yes
36 yes yes
37 yes yes
38 yes yes
39 no no
40 no no
41 yes yes
42 yes yes
43 yes yes
44 yes yes
45 yes yes
46 no no
47 no no
48 yes no
49 no no
50 no no
51 no no
52 yes yes
53 yes yes
54 yes yes
55 no no
56 yes yes
57 no no
58 no no
59 yes yes
60 no no
61 yes yes
62 yes yes
63 no no
64 no no
65 yes yes
66 no no
67 no no
68 no no
69 yes yes
70 no no
71 no no
72 no no
73 no no
74 no no
75 yes yes
76 no no
77 yes yes
78 yes yes
79 no no
80 yes yes
81 yes yes
82 yes yes
83 yes yes
84 yes yes
85 no no
86 yes yes
87 yes yes
88 yes yes
89 yes yes
90 yes yes
91 yes yes
92 yes yes
93 yes yes
94 yes yes
95 yes yes
96 yes yes
97 yes yes
98 yes 

753 no no
754 no no
755 no no
756 yes yes
757 yes yes
758 no no
759 yes yes
760 yes no
761 yes yes
762 yes yes
763 no no
764 no no
765 yes yes
766 no no
767 no no
768 no no
769 yes no
770 yes yes
771 yes yes
772 no no
773 yes yes
774 yes yes
775 yes no
776 no no
777 no no
778 no no
779 yes yes
780 no no
781 no no
782 no no
783 no no
784 no no
785 yes yes
786 no no
787 yes yes
788 no no
789 no no
790 no no
791 yes yes
792 no no
793 no no
794 yes yes
795 yes yes
796 yes no
797 yes yes
798 no no
799 no yes
800 yes yes
801 no no
802 no no
803 yes yes
804 no no
805 yes yes
806 no no
807 yes yes
808 no no
809 yes yes
810 yes yes
811 yes yes
812 no no
813 no no
814 yes yes
815 yes yes
816 no no
817 no no
818 no no
819 no no
820 no no
821 no no
822 yes yes
823 no no
824 yes yes
825 no no
826 yes yes
827 no yes
828 no no
829 no no
830 no yes
831 yes yes
832 yes yes
833 yes yes
834 yes yes
835 no no
836 yes yes
837 no no
838 no no
839 no no
840 yes yes
841 no no
842 yes no
843 yes yes
844 no no


In [72]:
count


947

In [68]:
print(question["target"], "\n")
print(train_ques[qkey]["question"], "\n")
print(train_ques[qkey]["semanticStr"], "\n")
print(train_ques[qkey]["answer"])

{'args': ['A', 'B', 'C', 'D'], 'body': [[1, {'prefix': None, 'name': 'object', 'pred': <__main__.predicate object at 0x7fe77bc77550>, 'var': ('A', 'D')}, {'prefix': None, 'name': 'object', 'pred': <__main__.predicate object at 0x7fe77bc77550>, 'var': ('B', 'D')}, {'prefix': None, 'name': 'object', 'pred': <__main__.predicate object at 0x7fe77bc77550>, 'var': ('C', 'D')}]], 'query': [4, 10, 16, 'query']} 

Are the animals sheep? 

select: animal (390072,390074,390069)->same: type [0] 

yes


In [69]:
sceneData


{'KB': <__main__.knowledgeBase at 0x7fe8ad1330b8>,
 'local map': {'390093': [0, 553],
  '390094': [1, 960],
  '390083': [2, 666],
  '390073': [3, 528],
  '390072': [4, 802],
  '390071': [5, 528],
  '390070': [6, 528],
  '390077': [7, 813],
  '390076': [8, 523],
  '390075': [9, 523],
  '390074': [10, 802],
  '390095': [11, 702],
  '390087': [12, 1688],
  '390079': [13, 813],
  '390078': [14, 813],
  '390068': [15, 523],
  '390069': [16, 1290],
  '390080': [17, 813],
  '390092': [18, 554],
  'null': [19, 'null']}}

In [33]:
print("4", all_objects[134])

4 bike


In [34]:
print("1", all_objects[714])

1 helmet


## Problem Cases

#### Convention in the order of arguments for "relate" and "verify rel"

In some cases the functional form doesn't stick to a fixed convention. Consider the question "Do you see any bookcase to the left of the napkin the cat is to the right of?", here we have three objects a cat, napkin and a bookcase and we know that the cat is to the right of the napkin. In shothand if we denote this physical relation as napkin--cat and we want to know if bookcase--napkin--cat is true? This induces the following target predicate,
<div align="center">target(bookcase, napkin, cat) $\gets$ to_the_right_of(cat, napkin), to_the_left_of(bookcase, napkin)</div>
From the scene graph we encode the relation napking--cat as to_the_right_of(cat, napkin). Looking at the functional form of the question select: cat (1298333)->relate: napkin,to the right of,o (1298346) [0]->relate: bookcase,to the left of,s (1298370) [1]->exist: ? [2], if we stick to the convention we assumed previously then we get
<div align="center">target(bookcase, napkin, cat) $\gets$ to_the_right_of(napkin, cat), to_the_left_of(bookcase, napkin)</div>
Which is not what we need! This is not solved even if I switch the convention as that would result in,
<div align="center">target(bookcase, napkin, cat) $\gets$ to_the_right_of(cat, napkin), to_the_left_of(napkin, bookcase)</div>
Which is still not what we need indicating that the functional form doesn't follow a fixed convention! So naturally the model predicts the wrong answer. Here I've shown an example for the operation "relate" but a similar behavior is observed for "verify rel" as well. 

#### Functional form contains relations not present in the scene graph

In few cases the functional form contains relations that aren't contained in the scene graph for the scene. For example, in the question "Do the man jeans look dark?" we are trying to query if the color of the jeans the man's wearing is dark?. Looking at the functional form "select: man (4486961)->relate: jeans,of,s (4486963) [0]->verify color: dark [1]" we see that the relation "of" is used to relate the man and the jeans. But in the scene graph this relation is not present. Instead we have the relation "wearing" connecting "man" and "jeans". Without any background on the relation "of", forward chaining results in the wrong answer


In [70]:
imageId

'2402376'

# Non-binary questions

In [28]:
nb_ques = {}

for qkey in train_ques:
    if train_ques[qkey]["answer"] not in ["yes", "no"]:
        nb_ques[qkey] = train_ques[qkey]

In [29]:
# Printing random non-binary questions
random.seed(1024)
N_ques = 15
random_choice = random.sample(list(nb_ques.keys()), N_ques)

for key in random_choice:
    print("Key: {}".format(key))
    print("Question: {}".format(nb_ques[key]["question"]))
    print("FF: {}".format(nb_ques[key]["semanticStr"]))
    print("Answer: {}".format(nb_ques[key]["answer"]))
    print("\n")

Key: 15548375
Question: What animal is it?
FF: select: animal (325211)->query: name [0]
Answer: cat


Key: 02239633
Question: What is on the cow?
FF: select: cow (573466)->relate: _,on,s (573471) [0]->query: name [1]
Answer: rope


Key: 07178125
Question: Which side of the image is the bottle on?
FF: select: bottle (2700553)->query: hposition [0]
Answer: left


Key: 02825521
Question: What's the sidewalk made of?
FF: select: sidewalk (401287)->query: material [0]
Answer: concrete


Key: 09457573
Question: What's the man sitting on?
FF: select: man (312222)->relate: _,sitting on,o (312225) [0]->query: name [1]
Answer: ski lift


Key: 13443729
Question: Which side of the picture is the container on?
FF: select: container (3914778)->query: hposition [0]
Answer: left


Key: 04495788
Question: What color are the sneakers?
FF: select: sneakers (674710)->query: color [0]
Answer: black


Key: 19963773
Question: What color are the laptops to the left of the guitar?
FF: select: guitar (2366283)-

In [133]:
count = 0
op_keys = []
query_args = []
sp_ans = []
common_ans = []
for key in nb_ques:
    question = nb_ques[key]
    semOperations = question["semantic"]
    for op in semOperations:
        operation = op["operation"]
        if operation == "query":
#             if op["argument"] == "None":
#             common_ans.append(question["answer"])
            if op["argument"] == "tone":
                sp_ans.append(question["answer"])

#                 print("QKey: {}".format(key))
#                 print("Image ID: {}".format(question["imageId"]))
#                 print(question["question"])
#                 print(question["semanticStr"])
#                 print(question["answer"])
#                 print("\n")
#                 count += 1
#                 op_keys.append(key)
            
#     if count >= 200:
#         break


In [134]:
sp_ans = sorted(list(set(sp_ans)))
for ans in sp_ans:
    print(ans)

light
taking a picture


In [59]:
query_args = sorted(list(set(query_args)))
with open("./query_args.txt", "w") as file:
    for args in query_args[8:]:
        file.write(args+"\n")