GENERO I NUOVI CLAIMS ALIGNMENT

In [3]:
import json
import os

def load_json(path):
    with open(path, 'r') as file:
        return json.load(file)

def save_json(data, path):
    with open(path, 'w') as file:
        json.dump(data, file, indent=4)

# Carica il dizionario di riferimento
dictionary_path = '../data/alignment/dictionary.json'
dictionary = load_json(dictionary_path)

# Elenco dei file JSON da elaborare
source_directory = '../data/Gemini_claims/json'
destination_directory = '../data/alignment/claims'
json_files = [f for f in os.listdir(source_directory) if f.endswith('.json')]

# Assicurati che la cartella di destinazione esista
os.makedirs(destination_directory, exist_ok=True)

def check_and_replace(current_value, dictionary):
    for key, values in dictionary.items():
        if current_value in values:
            return key
    return current_value

for json_file in json_files:
    source_path = os.path.join(source_directory, json_file)
    destination_path = os.path.join(destination_directory, json_file)
    
    data = load_json(source_path)
    
    for claim_id, claim_data in data.items():
        for spec_id, spec in claim_data['specifications'].items():
            # Verifica e sostituzione per "name"
            spec['name'] = check_and_replace(spec['name'], dictionary['name_specifications'])

            # Verifica e sostituzione per "value"
            spec['value'] = check_and_replace(spec['value'], dictionary['value_specifications'])

        # Verifica e sostituzione per "Measure"
        claim_data['Measure'] = check_and_replace(claim_data['Measure'], dictionary['name_measure'])
            
    save_json(data, destination_path)

print("Elaborazione completata.")


Elaborazione completata.


Codice per generare il file DATA_HUNTERS_ALIGNMENT.JSON

In [4]:
import os
import json

def read_json_files(directory):
    data = []
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            # Aggiungiamo encoding='utf-8' per evitare problemi di caratteri speciali.
            with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file:
                data.append((filename, json.load(file)))
    return data

def extract_data(json_files):
    aligned_names = {}
    aligned_values = {}
    aligned_measures = {}  # Qui salviamo la Measure SENZA specification_id

    for filename, content in json_files:
        # Supponendo che il nome del file sia:
        # paperID_tableID_qualcosAltro.json
        # Prendiamo i primi due segmenti come paper_id e table_id
        paper_id, table_id, _ = filename.split('_')[:3]
        
        for claim_id, claim_data in content.items():
            # 1) Gestiamo la Measure che NON ha specification_id
            measure = claim_data.get('Measure')
            if measure:
                # Chiave senza specID
                measure_key = f"{paper_id}_{table_id}_{claim_id}"
                # Inseriamo measure_key nella lista corrispondente alla measure
                if measure not in aligned_measures:
                    aligned_measures[measure] = []
                aligned_measures[measure].append(measure_key)
            
            # 2) Gestiamo le specifications (name e value) che hanno specification_id
            specs = claim_data.get('specifications', {})
            for spec_id, spec_details in specs.items():
                name = spec_details.get('name', '')
                value = spec_details.get('value', '')

                # Chiave con specID
                specs_key = f"{paper_id}_{table_id}_{claim_id}_{spec_id}"
                
                # Aligned names
                if name not in aligned_names:
                    aligned_names[name] = []
                aligned_names[name].append(specs_key)
                
                # Aligned values
                if value not in aligned_values:
                    aligned_values[value] = []
                aligned_values[value].append(specs_key)

    return aligned_names, aligned_values, aligned_measures

def save_json(data, path):
    # Salviamo il risultato finale su file JSON
    with open(path, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=4)

# Main execution path
directory = '../data/alignment/claims'
output_path = '../data/alignment/alignment.json'

json_files = read_json_files(directory)
aligned_names, aligned_values, aligned_measures = extract_data(json_files)

result_data = {
    "aligned_names": aligned_names,
    "aligned_values": aligned_values,
    "aligned_measures": aligned_measures
}

save_json(result_data, output_path)