In [1]:
import relational_image_generation_evaluation as rige

In [2]:
evaluator = rige.Evaluator('ViT-L/14')
dataloader_one = rige.get_one_edge_dataloader(testonly=True)
dataloader_two = rige.get_two_edge_dataloader(testonly=True)
dataloader_full = rige.get_full_graph_dataloader(testonly=True)
print("len(dataloader_one):", len(dataloader_one))
print("len(dataloader_two):", len(dataloader_two))

Using device cuda:1 for evaluation.
Loading filtered test graphs...
Finished loading filtered test graphs
Generating one edge graphs...


100%|██████████| 100/100 [00:00<00:00, 1211.82it/s]


Finished generating one edge graphs
Loading filtered test graphs...
Finished loading filtered test graphs
Generating two edge graphs...


100%|██████████| 100/100 [00:00<00:00, 947.50it/s]

Finished generating two edge graphs
len(dataloader_one): 837
len(dataloader_two): 1076





In [3]:
from PIL import Image
images = []
graphs = []
for i in range(10):
    graph = next(iter(dataloader_one))[0]
    image_id = graph.image_id
    IMAGE_DIR = '/local/home/jthomm/GraphCLIP/datasets/visual_genome/raw/VG/'
    image = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    images.append(image)
    graphs.append(graph)

In [4]:
scores = evaluator(images,graphs)
print(scores)
print(scores.keys())

{'rel_scores': [0.6908356547355652, 0.5116413831710815, 0.04193810373544693, 0.6064157485961914, 0.9496923685073853, 0.14458894729614258, 0.46405264735221863, 0.05490296706557274, 0.3069431185722351, 0.9973773956298828], 'attr_scores': ['noattributes', 0.6358119249343872, 0.5330018401145935, 0.9283628463745117, 0.8468342423439026, 'noattributes', 0.9209091067314148, 0.7131649851799011, 0.9490647912025452, 'noattributes']}
dict_keys(['rel_scores', 'attr_scores'])


In [5]:
images = []
graphs = []
for i in range(10):
    graph = next(iter(dataloader_two))[0]
    assert len(graph.edges) == 2, f"Graph does not have two edges: {graph.edges}"
    image_id = graph.image_id
    IMAGE_DIR = '/local/home/jthomm/GraphCLIP/datasets/visual_genome/raw/VG/'
    image = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    # image = image.convert('RGB')
    images.append(image)
    graphs.append(graph)

In [6]:
scores = evaluator(images,graphs)
print(scores)
print(scores.keys())

{'rel_scores': [0.3139377236366272, 0.8993645906448364, 0.8383625745773315, 0.1365509331226349, 0.7768183350563049, 0.6908382177352905, 0.1832142174243927, 0.6684421300888062, 0.03941406309604645, 0.6384907960891724], 'attr_scores': [0.5300025343894958, 'noattributes', 0.49980831146240234, 0.8366420269012451, 0.6743819117546082, 'noattributes', 0.6601665616035461, 0.9565271139144897, 0.5323062539100647, 0.902817964553833]}
dict_keys(['rel_scores', 'attr_scores'])


In [3]:
from PIL import Image

images_orig = []
graphs_orig = []
images_adv = []
graphs_adv = []
for i in range(300):
    graph = next(iter(dataloader_one))[0]
    assert len(graph.edges) == 1, f"Graph does not have one edge: {graph.edges}"
    image_id = graph.image_id
    IMAGE_DIR = '/local/home/jthomm/GraphCLIP/datasets/visual_genome/raw/VG/'
    image_orig = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    image_adv = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    # image = image.convert('RGB')
    images_orig.append(image_orig)
    graphs_orig.append(graph)
    graph_adv = rige.copy_graph(graph)
    # flip the attributes of the two nodes in the graph
    [n1,n2] = list(graph_adv.nodes)[0:2]
    graph_adv.nodes[n1]['attributes'], graph_adv.nodes[n2]['attributes'] = graph_adv.nodes[n2]['attributes'], graph_adv.nodes[n1]['attributes']
    images_adv.append(image_adv)
    graphs_adv.append(graph_adv)
scores_orig = evaluator(images_orig,graphs_orig)
scores_adv = evaluator(images_adv,graphs_adv)
print(scores_orig['attr_scores'])
print(scores_adv['attr_scores'])

[0.6251007914543152, 'noattributes', 0.9785438179969788, 'noattributes', 'noattributes', 0.9946193695068359, 'noattributes', 'noattributes', 'noattributes', 'noattributes', 0.9359805583953857, 0.6843078136444092, 0.8705612421035767, 0.510028600692749, 0.7090193033218384, 0.2566523551940918, 0.45144718885421753, 0.9655649065971375, 0.6729621887207031, 0.9088230133056641, 'noattributes', 0.5406613349914551, 0.6141828894615173, 0.7482196092605591, 0.4659290015697479, 0.5968430638313293, 0.6970917582511902, 0.7619655132293701, 'noattributes', 0.6219819784164429, 0.14049573242664337, 0.3810161352157593, 0.7985119819641113, 0.8124150633811951, 0.47171831130981445, 'noattributes', 0.6573120951652527, 0.9601491689682007, 'noattributes', 0.7675310373306274, 'noattributes', 0.5094158053398132, 0.9857764840126038, 0.7037944197654724, 0.9821365475654602, 'noattributes', 0.6317881345748901, 0.9613714218139648, 0.809873640537262, 0.9946193695068359, 'noattributes', 0.6988480091094971, 0.690822839736

In [4]:
# print how often the model was more confident in the good graph
n_good = 0
n_bad = 0
for orig,adv in zip(scores_orig['attr_scores'],scores_adv['attr_scores']):
    if orig == 'noattributes':
        continue
    if orig == adv:
        n_good += 0.5
        print("warning, this is unlikely to happen")
    elif orig>adv:
        n_good += 1
    else:
        n_bad += 1

print(f"n_correct: {n_good}, n_incorrect: {n_bad}, accuracy: {n_good/(n_good+n_bad)}")

n_correct: 177, n_incorrect: 22, accuracy: 0.8894472361809045
