In [9]:
import json
import os
import sys

def clean_json(data):
    if isinstance(data, dict) and 'entities' in data:
        entities = data['entities']
        if entities and not all(key in entities[-1] for key in ['name', 'category', 'reasoning']):
            entities.pop()
        return data
    return data

# Specify the directory containing your JSON files
directory = 'llama3_generated_entity_test'

# Iterate through all JSON files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.json'):
        filepath = os.path.join(directory, filename)
        
        print(f"Processing file: {filename}")
        
        try:
            # Read the current JSON file
            with open(filepath, 'r') as file:
                data = json.load(file)
            
            # Clean the JSON data
            data = clean_json(data)
            
            if isinstance(data, list) and all('entity' in item for item in data):
                # The file is already in the desired format
                print(f"File already in correct format: {filename}")
                continue
            
            if 'entities' not in data:
                raise KeyError("'entities' key not found in the JSON file")
            
            # Transform the data structure
            new_data = []
            for entity in data['entities']:
                new_entity = {
                    'entity': entity['name'],
                    'category': entity['category'],
                    'reasoning': entity['reasoning']
                }
                new_data.append(new_entity)
            
            # Write the transformed data back to the file
            with open(filepath, 'w') as file:
                json.dump(new_data, file, indent=4)
            
            print(f"Successfully transformed: {filename}")
        except json.JSONDecodeError as e:
            print(f"JSON Decode Error in file: {filename}")
            print(f"Error message: {str(e)}")
            with open(filepath, 'r') as file:
                print(f"File content:")
                print(file.read())
            sys.exit(1)  # Stop processing immediately
        except Exception as e:
            print(f"Error processing file: {filename}")
            print(f"Error message: {str(e)}")
            print(f"File content:")
            with open(filepath, 'r') as file:
                print(file.read())
            sys.exit(1)  # Stop processing immediately

print("All JSON files have been processed successfully.")

Processing file: P80-1004_generated.json
File already in correct format: P80-1004_generated.json
Processing file: W04-2703_generated.json
File already in correct format: W04-2703_generated.json
Processing file: N06-4001_generated.json
File already in correct format: N06-4001_generated.json
Processing file: NIPS_2003_10_abs_generated.json
File already in correct format: NIPS_2003_10_abs_generated.json
Processing file: C04-1011_generated.json
File already in correct format: C04-1011_generated.json
Processing file: ECCV_2016_204_abs_generated.json
File already in correct format: ECCV_2016_204_abs_generated.json
Processing file: E06-1045_generated.json
Successfully transformed: E06-1045_generated.json
Processing file: CVPR_2010_11_abs_generated.json
Successfully transformed: CVPR_2010_11_abs_generated.json
Processing file: P03-1005_generated.json
Successfully transformed: P03-1005_generated.json
Processing file: H89-2066_generated.json
Successfully transformed: H89-2066_generated.json
Proc

AttributeError: 'tuple' object has no attribute 'tb_frame'