In [1]:
import json
import os
from typing import Union

In [50]:
# Suggestions: (i) name the file in the top level directory "summary.json"; (ii) make sure that it only contains "succesfull" runs; (iii) update the extention of the predictions file to .jsonl

def read_experiment_summaries(path):
    with open(path, "r") as f:
        data = json.load(f)
    return data

def read_predictions(path):
    with open(path, "r") as f:
        data = [json.loads(line) for line in f.readlines()]
    return data

def get_max_len(triplets, field):
    return max([len(t[field]['name']) for t in triplets])

def get_triplets_sequence(triplets):
    max_subject_len = get_max_len(triplets, "subject")
    max_relation_len = get_max_len(triplets, "relation")
    max_object_len = get_max_len(triplets, "object")

    return "\n".join([f"{t['subject']['name']: <{max_subject_len}} ~~~ {t['relation']['name']: <{max_relation_len}} ~~~ {t['object']['name']: <{max_object_len}}" for t in triplets])

def get_triplets_API_format(triplets):
    return " ".join([f"(\"subject\": {t['subject']['name']}, \"relation\": {t['relation']['name']}, \"object\": {t['object']['name']})" for t in triplets])

def get_textual_sequences(model_completions, num_completions_to_show: int):
    if num_completions_to_show  == 1:
        return model_completions[0]
    else:
        return "\n".join(model_completions[:num_completions_to_show])

def visualize_predictions_for_instance(prediction_obj, show_triplets: bool, num_completions_to_show: int, show_api_formatted_triplets: bool):
    if show_triplets:
        print(f"{'~' * 30} Triplets {'~' * 30}")
        print(get_triplets_sequence(prediction_obj["triplet_set"]))
        print()

    if show_api_formatted_triplets:
        print(f"{'~' * 30} Triplets {'~' * 30}")
        print(get_triplets_API_format(prediction_obj["triplet_set"]))
        print()

    
    if num_completions_to_show > 0:
        print(f"{'~' * 30} Text {'~' * 30}")
        print(get_textual_sequences(prediction_obj["model_completions"], num_completions_to_show))
        print()

def get_id(pred_object):
    return (pred_object['extra-information']['dataset-id'], pred_object['extra-information']['dataset'])

def get_ordering(predictions):
    idx2id = {idx:get_id(p) for idx, p in enumerate(predictions)}
    id2idx = {v:k for k, v in idx2id.items()}
    return dict({"id2idx": id2idx, "idx2id": idx2id})

def visualize_predictions_for_experiment(path_to_hps_logs, show_triplets: bool, num_completions_to_show: int, debug_k: Union[int, None] = None, show_api_formatted_triplets: bool = False, show_problem: Union[int, None] = None):
    path_to_summary_file = os.path.join(path_to_hps_logs, "hps_synthie_small_results.json")
    experiment_summaries = read_experiment_summaries(path_to_summary_file)
    ordering_mapping = None

    for exp_idx, exp_metadata in enumerate(experiment_summaries):
        predictions = read_predictions(os.path.join(path_to_hps_logs, exp_metadata["result_filepath"]))
        if ordering_mapping is None:
            ordering_mapping = get_ordering(predictions)
            idx2id = ordering_mapping["idx2id"]
        
        for i in range(len(predictions)):
            if show_problem is not None:
                if i != show_problem:
                    continue
                
            if debug_k is not None:
                if i >= debug_k:
                    break
            
            id = idx2id[i]
            found_id = False
            for _, sample in enumerate(predictions):
                if get_id(sample) != id:
                    continue
                found_id = True
                print(f"{'=' * 30} [Parameter config {exp_idx}] Sample {i} -- ID: {id} {'=' * 30}")
                visualize_predictions_for_instance(sample, show_triplets=show_triplets, num_completions_to_show=num_completions_to_show, show_api_formatted_triplets=show_api_formatted_triplets)
                print()
            assert found_id, f"Could not find the sample with ID {id} in predictions"
    
    return ordering_mapping

In [51]:
path_to_hps_logs = "/dlabdata1/josifosk/SynthIE_main/data/hps_results/hps_synthie_small"
show_triplets = True
num_completions_to_show = 1
debug_k = None

visualize_predictions_for_experiment(path_to_hps_logs, show_triplets, num_completions_to_show, debug_k, show_api_formatted_triplets=False)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Triplets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Chief_Minister_of_the_Australian_Capital_Territory ~~~ officeholder              ~~~ Andrew_Barr                                                 
Andrew_Barr                                        ~~~ position held             ~~~ Chief_Minister_of_the_Australian_Capital_Territory          
Andrew_Barr                                        ~~~ member of political party ~~~ Australian_Labor_Party_(Australian_Capital_Territory_Branch)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The Government of the Australian Capital Territory, also referred to as the Australian Capital Territory Government or ACT Government, is the executive authority of the Australian Capital Territory, one of the territories of Australia. The leader of the party or coalition with the confidence of the Australian Capital Territory Legislative Assembly forms Government.  


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Triplets ~~~~~~~~~~~

{'id2idx': {(41948, 'rebel'): 0,
  (35685, 'rebel'): 1,
  (24952, 'rebel'): 2,
  (36458, 'rebel'): 3,
  (55061, 'rebel'): 4,
  (52712, 'rebel'): 5,
  (105540, 'rebel'): 6,
  (101285, 'rebel'): 7,
  (108295, 'rebel'): 8,
  (126977, 'rebel'): 9,
  (123616, 'rebel'): 10,
  (112276, 'rebel'): 11,
  (132301, 'rebel'): 12},
 'idx2id': {0: (41948, 'rebel'),
  1: (35685, 'rebel'),
  2: (24952, 'rebel'),
  3: (36458, 'rebel'),
  4: (55061, 'rebel'),
  5: (52712, 'rebel'),
  6: (105540, 'rebel'),
  7: (101285, 'rebel'),
  8: (108295, 'rebel'),
  9: (126977, 'rebel'),
  10: (123616, 'rebel'),
  11: (112276, 'rebel'),
  12: (132301, 'rebel')}}

In [61]:
predictions[0]

{'triplet_set': [{'subject': {'name': 'Chief_Minister_of_the_Australian_Capital_Territory',
    'qid': 'Q493649'},
   'object': {'name': 'Andrew_Barr', 'qid': 'Q4756289'},
   'relation': {'name': 'officeholder', 'pid': 'P1308'}},
  {'subject': {'name': 'Andrew_Barr', 'qid': 'Q4756289'},
   'object': {'name': 'Chief_Minister_of_the_Australian_Capital_Territory',
    'qid': 'Q493649'},
   'relation': {'name': 'position held', 'pid': 'P39'}},
  {'subject': {'name': 'Andrew_Barr', 'qid': 'Q4756289'},
   'object': {'name': 'Australian_Labor_Party_(Australian_Capital_Territory_Branch)',
    'qid': 'Q20683858'},
   'relation': {'name': 'member of political party', 'pid': 'P102'}}],
 'extra-information': {'dataset-id': 41948, 'dataset': 'rebel'},
 'prompt': 'Relations: ((Chief_Minister_of_the_Australian_Capital_Territory, appointed by, Australian_Capital_Territory_Legislative_Assembly))\nThe following sentence expresses the previous relations: The Government of the Australian Capital Territory

In [60]:
path_to_hps_logs = "/dlabdata1/josifosk/SynthIE_main/data/hps_results/hps_synthie_small"
show_triplets = True
num_completions_to_show = 1
debug_k = None

visualize_predictions_for_experiment(path_to_hps_logs, show_triplets, num_completions_to_show, debug_k, show_api_formatted_triplets=True, show_problem=6)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Triplets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The_Legend_of_the_Golden_Gun ~~~ publication date ~~~ 1979           
The_Sacketts                 ~~~ publication date ~~~ 1979           
The_Shadow_Riders_(film)     ~~~ screenwriter     ~~~ Louis_L'Amour  
Louis_L'Amour                ~~~ genre            ~~~ Western_(genre)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Triplets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
("subject": The_Legend_of_the_Golden_Gun, "relation": publication date, "object": 1979) ("subject": The_Sacketts, "relation": publication date, "object": 1979) ("subject": The_Shadow_Riders_(film), "relation": screenwriter, "object": Louis_L'Amour) ("subject": Louis_L'Amour, "relation": genre, "object": Western_(genre))

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In 1979, L'Amour wrote The Sacketts and The Legend of the Golden Gun, which were published posthumously.  


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Triplets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The_Leg

{'id2idx': {(41948, 'rebel'): 0,
  (35685, 'rebel'): 1,
  (24952, 'rebel'): 2,
  (36458, 'rebel'): 3,
  (55061, 'rebel'): 4,
  (52712, 'rebel'): 5,
  (105540, 'rebel'): 6,
  (101285, 'rebel'): 7,
  (108295, 'rebel'): 8,
  (126977, 'rebel'): 9,
  (123616, 'rebel'): 10,
  (112276, 'rebel'): 11,
  (132301, 'rebel'): 12},
 'idx2id': {0: (41948, 'rebel'),
  1: (35685, 'rebel'),
  2: (24952, 'rebel'),
  3: (36458, 'rebel'),
  4: (55061, 'rebel'),
  5: (52712, 'rebel'),
  6: (105540, 'rebel'),
  7: (101285, 'rebel'),
  8: (108295, 'rebel'),
  9: (126977, 'rebel'),
  10: (123616, 'rebel'),
  11: (112276, 'rebel'),
  12: (132301, 'rebel')}}

In [25]:
path_to_hps_logs

'/dlabdata1/josifosk/SynthIE_main/data/hps_results/hps_synthie_small'

In [26]:
path_to_summary_file = os.path.join(path_to_hps_logs, "hps_synthie_small_results.json")
experiment_summaries = read_experiment_summaries(path_to_summary_file)



In [38]:

for exp_idx, exp_metadata in enumerate(experiment_summaries):
    predictions = read_predictions(os.path.join(path_to_hps_logs, exp_metadata["result_filepath"]))
    s = set([(p['extra-information']['dataset-id'], p['extra-information']['dataset']) for p in predictions])
    break

In [40]:
for exp_idx, exp_metadata in enumerate(experiment_summaries):
    predictions = read_predictions(os.path.join(path_to_hps_logs, exp_metadata["result_filepath"]))
    s2 = set([(p['extra-information']['dataset-id'], p['extra-information']['dataset']) for p in predictions])
    assert s == s2, exp_idx

In [41]:
exp_idx

17