In [None]:
# I have anxiety_test_fig_extractor_results.jsonl and anxiety_test.json and I want to combine the figurative reasoning into the anxiety_test_combined.json
"""
The anxiety_test.json looks like this: [
    {
        "sample_id": "TE-1",
        "ocr_text": "Me stressing about things have no control over",
        "meme_anxiety_category": "Lack of Worry Control"
    }, ... ]

And the anxiety_test_fig_extractor_results.jsonl looks like this: 

{"image_filename": "TE-104.jpg", "result": {"id": "chatcmpl-38e30e3245e14bd7923f29ad04017faa", "object": "chat.completion", "created": 1744617016, "model": "Qwen/Qwen2.5-VL-7B-Instruct", "choices": [{"index": 0, "message": {"role": "assistant", "content": "1. **Cause-effect**: \n   - **Cause**: Social anxiety at a party.\n   - **Effect**: SpongeBob is considering leaving the party.\n\n2. **Figurative Understanding**: \n   - The meme uses SpongeBob's expression and body language to symbolize the feeling of social anxiety. The phrase \"IGHT IMMA HEAD OUT\" humorously represents the urge to leave the party due to anxiety, which is a common experience for many people.\n\n3. **Mental State**: \n   - **Mental State**: Social anxiety, feeling overwhelmed, and the desire to escape the situation.", "tool_calls": []}, "finish_reason": "stop", "logprobs": null}], "usage": {"prompt_tokens": 113, "total_tokens": 235, "completion_tokens": 122}}}
{"image_filename": "TE-1.jpg", "result": {"id": "chatcmpl-141a8446901043ef9ef866372a4027c2", "object": "chat.completion", "created": 1744617017, "model": "Qwen/Qwen2.5-VL-7B-Instruct", "choices": [{"index": 0, "message": {"role": "assistant", "content": "1. **Cause-effect**: \n   - **Cause**: Stressing about things one has no control over.\n   - **Effect**: Increased anxiety and emotional distress.\n\n2. **Figurative Understanding**:\n   - **Metaphor**: The meme uses the visual metaphor of a person covering their face with their hands to symbolize the overwhelming feeling of helplessness and anxiety when faced with uncontrollable situations.\n   - **Symbolism**: The increasing size of the hands covering the face in the second panel symbolizes the intensification of the anxiety and emotional turmoil.\n\n3. **Mental State**:\n   - **Emotional State**: Anxiety, helplessness, and emotional distress.", "tool_calls": []}, "finish_reason": "stop", "logprobs": null}], "usage": {"prompt_tokens": 113, "total_tokens": 251, "completion_tokens": 138}}} ...

add the key figurative_reasoning to anxiety_test.json and store the new json as <filename>_combined.json
"""

In [5]:
import json

def combine_json_and_jsonl(json_file, jsonl_file, output_file):
    """
    Combines data from a JSON file and a JSONL file, adding the 'figurative_reasoning'
    from the JSONL to the corresponding entry in the JSON file based on 'sample_id'.

    Args:
        json_file (str): Path to the JSON file.
        jsonl_file (str): Path to the JSONL file.
        output_file (str): Path to save the combined JSON file.
    """

    # Load the JSON data
    with open(json_file, 'r') as f:
        json_data = json.load(f)

    # Create a dictionary to store the figurative reasoning results from the JSONL file
    fig_reasoning_dict = {}
    with open(jsonl_file, 'r') as f:
        for line in f:
            data = json.loads(line)
            image_filename = data['image_filename']
            # Extract sample_id from image_filename (e.g., TE-1.jpg -> TE-1)
            sample_id = image_filename.split('.')[0]
            fig_reasoning_dict[sample_id] = data['result']['choices'][0]['message']['content']

    # Iterate through the JSON data and add the figurative_reasoning
    for item in json_data:
        sample_id = item['sample_id']
        if sample_id in fig_reasoning_dict:
            item['figurative_reasoning'] = fig_reasoning_dict[sample_id]
        else:
            item['figurative_reasoning'] = None  # Or some default value if not found

    # Save the combined data to a new JSON file
    with open(output_file, 'w') as f:
        json.dump(json_data, f, indent=4)

# Define file paths
json_file = '/Users/dhruv/Work/IITD/winter_semester/NLP/Project/mental-health-meme-classification/dataset/Depressive_Data/val.json'
jsonl_file = '/Users/dhruv/Work/IITD/winter_semester/NLP/Project/mental-health-meme-classification/dataset/Depressive_Data/depressive_val_fig_extractor_results.jsonl'
output_file = '/Users/dhruv/Work/IITD/winter_semester/NLP/Project/mental-health-meme-classification/dataset/Depressive_Data/depressive_val_combined.json'

# Combine the data
combine_json_and_jsonl(json_file, jsonl_file, output_file)