In [2]:
import json
import pandas as pd

model_names = [
    'vicuna-1.5-7b',
    'vicuna-1.3-33b', 
    'llama-2-7b',
    'llama-2-70b',
    'wizardlm-70b',
    'text-davinci-003',
    'gpt-3.5-turbo-instruct',
    'gpt-3.5-turbo-1106',
    'gpt-4',
    'gpt-4-1106-preview',
    'mistral',
    'zephyr-7b-beta',
    'galactica-30b',
    'openchat'
    ]

dataset_names_1 = [
    'cdr_rand_200',
    'docred_rand_200'
]

dataset_names_2 = [
    'nyt10m_rand_500',
    'wiki20m_rand_500'
]

dataset_names_3 = [
    'tacred_rand_800',
    'wiki80_rand_800'
]


# Function to load JSON data
def load_json_data(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

# Load your data from JSON files
num_triples = load_json_data('./results/average_num_triples.json')
num_tokens_per_triple = load_json_data('./results/average_num_tokens_per_triple.json')
TS = load_json_data('./results/TS.json')
US = load_json_data('./results/US.json')
FS = load_json_data('./results/FS.json')
GS = load_json_data('./results/GS.json')
CS = load_json_data('./results/CS.json')

# Create a dictionary to store the data
for dataset_names in [dataset_names_1, dataset_names_2, dataset_names_3]:
    data = {model: {f"{dataset}_{metric}": None for dataset in dataset_names for metric in ['NUM_TRIPLES', 'NUM_TOKENS_PER_TRIPLE', 'TS', 'US', 'FS', 'GS', 'CS']} for model in model_names}
    for dataset_name in dataset_names:
        for metric, metric_data in zip(['NUM_TRIPLES', 'NUM_TOKENS_PER_TRIPLE', 'TS', 'US', 'FS', 'GS', 'CS'], [num_triples, num_tokens_per_triple, TS, US, FS, GS, CS]):
            for model_name in model_names:
                if metric == 'NUM_TRIPLES' or metric == 'NUM_TOKENS_PER_TRIPLE':
                    score = metric_data.get(dataset_name, {}).get(model_name)
                    data[model_name][f"{dataset_name}_{metric}"] = f"{round(score, 2)}"
                else:
                    score = metric_data.get(dataset_name, {}).get(model_name)
                    data[model_name][f"{dataset_name}_{metric}"] = f"{round(score * 100, 1)}" if score is not None else "-"

    # Convert the dictionary to a DataFrame
    df = pd.DataFrame(data).T  # Transpose to have models as rows

    # Replace None with "-" to indicate missing data in the DataFrame
    df.fillna("-", inplace=True)

    # Print the DataFrame
    df.to_csv(f'./results/results_{dataset_names}.csv')
        