## Convert Lists in DB Arrays

In [1]:
import json

# Function to convert the language string to a list of strings
def convert_language_to_list(language_str):
    # Remove leading and trailing square brackets and split by comma
    languages_list = language_str.strip("[]").split(", ")
    # Remove any surrounding quotes and whitespace
    return [lang.strip('\"').strip() for lang in languages_list]

# Load the JSON file
input_file_path = 'final database\ChatIMPACT.LargeLanguageModel.json'
output_file_path = 'final database\ChatIMPACT.LargeLanguageModel.json'

with open(input_file_path, 'r') as file:
    models_data = json.load(file)

# Convert the Language array from string to a proper list
for model in models_data:
    if isinstance(model.get("developers"), str):
        model["developers"] = convert_language_to_list(model["developers"])

# Save the updated JSON data to a file
with open(output_file_path, 'w') as json_file:
    json.dump(models_data, json_file, indent=4)

print(f"Updated JSON data has been saved to {output_file_path}")


Updated JSON data has been saved to final database\ChatIMPACT.LargeLanguageModel.json


In [19]:
import json

# Function to format the JSON data
def format_json(data):
    formatted_data = []
    for item in data:
        formatted_item = {
            "LargeLanguageModel": item["LargeLanguageModel"],
            "DownstreamTask": [{"$oid": oid} for oid in item["DownstreamTask"]["$oid"]]
        }
        formatted_data.append(formatted_item)
    return formatted_data

# Load the input JSON data
input_file = 'json relazioni\\suitedFor.json'  # Replace with your actual input file path
output_file = 'json relazioni\\suitedFor.json'  # Output file path

with open(input_file, 'r') as file:
    initial_data = json.load(file)

# Format the JSON data
formatted_data = format_json(initial_data)

# Save the formatted data to a new JSON file
with open(output_file, 'w') as file:
    json.dump(formatted_data, file, indent=4)

print(f"Formatted data saved to {output_file}")


Formatted data saved to json relazioni\suitedFor.json


## Make Train Relationships

In [18]:
import json

# Load the JSON files
with open('final database hf extracted/HuggingFace.Datasets.json', 'r') as f:
    datasets = json.load(f)

with open('final database hf extracted/HuggingFace.Models.json', 'r') as f:
    models = json.load(f)

with open('hf extracted json/ChatIMPACT.TrainRelationship.json', 'r') as f:
    relationships = json.load(f)

# Create a dictionary to quickly find dataset IDs by name
dataset_dict = {dataset['name']: dataset['_id']['$oid'] for dataset in datasets}

# Create a dictionary to find model IDs by name, allowing multiple entries per name
model_dict = {}
for model in models:
    if model['name'] not in model_dict:
        model_dict[model['name']] = []
    model_dict[model['name']].append(model['_id']['$oid'])

# Build the relationship structure
output_relationships = []
for relationship in relationships:
    model_ids = model_dict.get(relationship["Models"], [])
    for model_id in model_ids:
        if relationship["Datasets"]:
            dataset_ids = [dataset_dict.get(ds) for ds in relationship["Datasets"]]
            dataset_ids = [ds_id for ds_id in dataset_ids if ds_id]
        else:
            dataset_ids = []
        for dataset_id in dataset_ids:
            output_relationships.append({
                "LargeLanguageModel": {"$oid": model_id},
                "Dataset": {"$oid": dataset_id}
            })

# Save the modified relationships to a new JSON file
output_file = 'train.json'
with open(output_file, 'w') as f:
    json.dump(output_relationships, f, indent=4)

print(f"Relationships saved to {output_file}")

Relationships saved to train.json


## Make SuitedFor Relationships

In [19]:
import json

# Load the JSON files
with open('final database hf extracted/HuggingFace.Downstram Tasks.json', 'r') as f:
    tasks = json.load(f)

with open('final database hf extracted/HuggingFace.Models.json', 'r') as f:
    models = json.load(f)

with open('hf extracted json/ChatIMPACT.SuitedForRelationship.json', 'r') as f:
    relationships = json.load(f)

# Create a dictionary to quickly find task IDs by name
task_dict = {task['name']: task['_id']['$oid'] for task in tasks}

# Create a dictionary to find model IDs by name, allowing multiple entries per name
model_dict = {}
for model in models:
    if model['name'] not in model_dict:
        model_dict[model['name']] = []
    model_dict[model['name']].append(model['_id']['$oid'])

# Build the relationship structure
output_relationships = []
for relationship in relationships:
    model_ids = model_dict.get(relationship["LargeLanguageModel"], [])
    for model_id in model_ids:
        if relationship["DownstreamTask"]:
            task_ids = [task_dict.get(ds) for ds in relationship["DownstreamTask"]]
            task_ids = [ds_id for ds_id in task_ids if ds_id]
        else:
            task_ids = []
        for task_id in task_ids:
            output_relationships.append({
                "LargeLanguageModel": {"$oid": model_id},
                "DownstreamTask": {"$oid": task_id}
            })

# Save the modified relationships to a new JSON file
output_file = 'suitedFor.json'
with open(output_file, 'w') as f:
    json.dump(output_relationships, f, indent=4)

print(f"Relationships saved to {output_file}")

Relationships saved to suitedFor.json


## Make Evaluated Relationships

In [1]:
import json

# Load the JSON files
with open('final database hf extracted/HuggingFace.Metrics.json', 'r') as f:
    metrics = json.load(f)

with open('final database hf extracted/HuggingFace.Models.json', 'r') as f:
    models = json.load(f)

with open('hf extracted json/ChatIMPACT.EvaluateRelationship.json', 'r') as f:
    relationships = json.load(f)

# Create a dictionary to quickly find metric IDs by name
metric_dict = {metric['name']: metric['_id']['$oid'] for metric in metrics}

# Create a dictionary to find model IDs by name, allowing multiple entries per name
model_dict = {}
for model in models:
    if model['name'] not in model_dict:
        model_dict[model['name']] = []
    model_dict[model['name']].append(model['_id']['$oid'])

# Build the relationship structure
output_relationships = []
for relationship in relationships:
    model_ids = model_dict.get(relationship["LargeLanguageModel"], [])
    for model_id in model_ids:
        if relationship["Metric"]:
            if isinstance(relationship["Metric"], list):
                metric_ids = [metric_dict.get(m) for m in relationship["Metric"]]
            else:
                metric_ids = [metric_dict.get(relationship["Metric"])]
            metric_ids = [m_id for m_id in metric_ids if m_id]
        else:
            metric_ids = []
        for metric_id in metric_ids:
            output_relationships.append({
                "LargeLanguageModel": {"$oid": model_id},
                "Metric": {"$oid": metric_id}
            })

# Save the modified relationships to a new JSON file
output_file = 'evaluated.json'
with open(output_file, 'w') as f:
    json.dump(output_relationships, f, indent=4)

print(f"Modified relationships saved to {output_file}")

Modified relationships saved to evaluated.json


## Make Enable Relationships

In [2]:
import json

# Load the JSON files
with open('final database hf extracted/HuggingFace.Datasets.json', 'r') as f:
    datasets = json.load(f)

with open('final database hf extracted/HuggingFace.Downstram Tasks.json', 'r') as f:
    tasks = json.load(f)

with open('hf extracted json/ChatIMPACT.EnableRelationship.json', 'r') as f:
    relationships = json.load(f)

# Create a dictionary to quickly find dataset IDs by name
dataset_dict = {dataset['name']: dataset['_id']['$oid'] for dataset in datasets}

# Create a dictionary to find task IDs by name
task_dict = {task['name']: task['_id']['$oid'] for task in tasks}

# Build the relationship structure
output_relationships = []
for relationship in relationships:
    dataset_id = dataset_dict.get(relationship["Dataset"])
    if dataset_id:
        if relationship["DownstreamTask"]:
            if isinstance(relationship["DownstreamTask"], list):
                task_ids = [task_dict.get(ds) for ds in relationship["DownstreamTask"]]
            else:
                task_ids = [task_dict.get(relationship["DownstreamTask"])]
            task_ids = [ds_id for ds_id in task_ids if ds_id]
        else:
            task_ids = []
        for task_id in task_ids:
            output_relationships.append({
                "Dataset": {"$oid": dataset_id},
                "DownstreamTask": {"$oid": task_id}
            })

# Save the modified relationships to a new JSON file
output_file = 'enable.json'
with open(output_file, 'w') as f:
    json.dump(output_relationships, f, indent=4)

print(f"Modified relationships saved to {output_file}")

Modified relationships saved to enable.json


## Make Assess Relationships

In [3]:
import json

# Load the JSON files
with open('final database hf extracted/HuggingFace.Metrics.json', 'r') as f:
    metrics = json.load(f)

with open('final database hf extracted/HuggingFace.Downstram Tasks.json', 'r') as f:
    tasks = json.load(f)

with open('hf extracted json/ChatIMPACT.AssessRelationship.json', 'r') as f:
    relationships = json.load(f)

# Create a dictionary to quickly find metric IDs by name
metric_dict = {metric['name']: metric['_id']['$oid'] for metric in metrics}

# Create a dictionary to find task IDs by name
task_dict = {task['name']: task['_id']['$oid'] for task in tasks}

# Build the relationship structure
output_relationships = []
for relationship in relationships:
    task_id = task_dict.get(relationship["DownstreamTask"])
    if task_id:
        if relationship["Metric"]:
            if isinstance(relationship["Metric"], list):
                metric_ids = [metric_dict.get(m) for m in relationship["Metric"]]
            else:
                metric_ids = [metric_dict.get(relationship["Metric"])]
            metric_ids = [m_id for m_id in metric_ids if m_id]
        else:
            metric_ids = []
        for metric_id in metric_ids:
            output_relationships.append({
                "DownstreamTask": {"$oid": task_id},
                "Metric": {"$oid": metric_id}
            })

# Save the modified relationships to a new JSON file
output_file = 'asses.json'
with open(output_file, 'w') as f:
    json.dump(output_relationships, f, indent=4)

print(f"Modified relationships saved to {output_file}")

Modified relationships saved to asses.json
