In [1]:
import relational_image_generation_evaluation as rige

In [2]:
evaluator = rige.Evaluator('ViT-L/14')
dataloader_one = rige.get_one_edge_dataloader(testonly=True)
dataloader_two = rige.get_two_edge_dataloader(testonly=True)
dataloader_full = rige.get_full_graph_dataloader(testonly=True)
print("len(dataloader_one):", len(dataloader_one))
print("len(dataloader_two):", len(dataloader_two))

Using device cuda:1 for evaluation.
Using text embeddings as input to the model.
Loading filtered test graphs...
Finished loading filtered test graphs
Generating one edge graphs...


100%|██████████| 100/100 [00:00<00:00, 1056.99it/s]

Finished generating one edge graphs
Loading filtered test graphs...
Finished loading filtered test graphs
Generating two edge graphs...



100%|██████████| 100/100 [00:00<00:00, 891.91it/s]


Finished generating two edge graphs
Loading filtered test graphs...
Finished loading filtered test graphs
len(dataloader_one): 837
len(dataloader_two): 1076


In [3]:
from PIL import Image
images = []
graphs = []
for i in range(10):
    graph = next(iter(dataloader_one))[0]
    image_id = graph.image_id
    IMAGE_DIR = '/local/home/jthomm/GraphCLIP/datasets/visual_genome/raw/VG/'
    image = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    images.append(image)
    graphs.append(graph)

In [4]:
scores = evaluator(images,graphs)
print(scores)
print(scores.keys())

{'rel_scores': [0.9963749051094055, 0.9753934741020203, 0.9214745163917542, 0.9958992600440979, 0.8125936388969421, 0.7544750571250916, 0.4216979146003723, 0.9317278861999512, 0.9186421036720276, 0.012344365939497948], 'attr_scores': [0.15855653584003448, 0.7651040554046631, 0.9766125679016113, 0.34666815400123596, 0.18600665032863617, 'noattributes', 0.11902481317520142, 0.25954243540763855, 0.3700721263885498, 0.07474261522293091]}
dict_keys(['rel_scores', 'attr_scores'])


In [5]:
images = []
graphs = []
for i in range(10):
    graph = next(iter(dataloader_two))[0]
    assert len(graph.edges) == 2, f"Graph does not have two edges: {graph.edges}"
    image_id = graph.image_id
    IMAGE_DIR = '/local/home/jthomm/GraphCLIP/datasets/visual_genome/raw/VG/'
    image = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    # image = image.convert('RGB')
    images.append(image)
    graphs.append(graph)

In [6]:
scores = evaluator(images,graphs)
print(scores)
print(scores.keys())

{'rel_scores': [0.9885056614875793, 0.9839268326759338, 8.248157428170089e-07, 0.9772341847419739, 0.9576905965805054, 0.523443341255188, 0.8659615516662598, 0.8880363702774048, 0.8659616112709045, 0.8461143970489502], 'attr_scores': [0.5559964179992676, 'noattributes', 0.24270299077033997, 'noattributes', 0.5323565006256104, 0.6055250763893127, 0.40389275550842285, 0.5705788731575012, 0.40389296412467957, 0.26415884494781494]}
dict_keys(['rel_scores', 'attr_scores'])


In [7]:
from PIL import Image

images_orig = []
graphs_orig = []
images_adv = []
graphs_adv = []
for i in range(300):
    graph = next(iter(dataloader_one))[0]
    assert len(graph.edges) == 1, f"Graph does not have one edge: {graph.edges}"
    image_id = graph.image_id
    IMAGE_DIR = '/local/home/jthomm/GraphCLIP/datasets/visual_genome/raw/VG/'
    image_orig = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    image_adv = Image.open(IMAGE_DIR + str(image_id) + '.jpg')
    # image = image.convert('RGB')
    images_orig.append(image_orig)
    graphs_orig.append(graph)
    graph_adv = rige.copy_graph(graph)
    # flip the attributes of the two nodes in the graph
    [n1,n2] = list(graph_adv.nodes)[0:2]
    graph_adv.nodes[n1]['attributes'], graph_adv.nodes[n2]['attributes'] = graph_adv.nodes[n2]['attributes'], graph_adv.nodes[n1]['attributes']
    images_adv.append(image_adv)
    graphs_adv.append(graph_adv)
scores_orig = evaluator(images_orig,graphs_orig)
scores_adv = evaluator(images_adv,graphs_adv)
print(scores_orig['attr_scores'])
print(scores_adv['attr_scores'])

['noattributes', 0.15855655074119568, 'noattributes', 0.3813652992248535, 'noattributes', 'noattributes', 0.4027596414089203, 0.5549305081367493, 'noattributes', 'noattributes', 0.5059268474578857, 0.34000304341316223, 0.42295584082603455, 0.4038931429386139, 'noattributes', 0.49560895562171936, 0.0864562839269638, 'noattributes', 0.6534953117370605, 0.23712939023971558, 0.028290022164583206, 0.4048355519771576, 'noattributes', 0.40389305353164673, 0.5688793063163757, 'noattributes', 0.568879246711731, 0.714468777179718, 0.29095205664634705, 0.16564823687076569, 0.28223586082458496, 0.5526719689369202, 0.0007241009152494371, 'noattributes', 'noattributes', 'noattributes', 0.06281307339668274, 'noattributes', 0.0477331206202507, 0.13036774098873138, 'noattributes', 0.35067781805992126, 'noattributes', 0.04443687945604324, 0.525521993637085, 0.17989161610603333, 'noattributes', 0.7651039361953735, 0.3132937550544739, 'noattributes', 0.6558380722999573, 0.9041731357574463, 0.2717391848564

In [8]:
# print how often the model was more confident in the good graph
n_good = 0
n_bad = 0
for orig,adv in zip(scores_orig['attr_scores'],scores_adv['attr_scores']):
    if orig == 'noattributes':
        continue
    if orig == adv:
        n_good += 0.5
        print("warning, this is unlikely to happen")
    elif orig>adv:
        n_good += 1
    else:
        n_bad += 1

print(f"n_correct: {n_good}, n_incorrect: {n_bad}, accuracy: {n_good/(n_good+n_bad)}")

n_correct: 185, n_incorrect: 15, accuracy: 0.925
