In [2]:
import csv
import os
import json

def read_csv_metadata(file_path):
    """Reads CSV metadata and returns a dictionary mapping img_id to diagnostic."""
    diagnostic_data = {}
    with open(file_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            img_id = row['img_id'].replace('.png', '')  # Remove '.png' from img_id
            diagnostic_data[img_id] = row['diagnostic']
    return diagnostic_data

def get_diagnostic(file_path, diagnostic_data):
    """Extracts the file identifier and returns its diagnostic from the data dictionary."""
    file_id = file_path.split('/')[-1].replace('_mask.png', '')
    return diagnostic_data.get(file_id, "Diagnostic not found")

def save_to_json(data, file_path):
    """Saves the given data to a JSON file."""
    with open(file_path, 'w') as jsonfile:
        json.dump(data, jsonfile, indent=4)

csv_file_path = 'additional_files/metadata.csv'
json_file_path = 'raw_data/diagnostic.json'

# Read the CSV and store the data
diagnostic_data = read_csv_metadata(csv_file_path)

# Dictionary to store diagnostics extracted from files
file_diagnostics = {}

directory = "data/masks"
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        diagnostic_value = get_diagnostic(f, diagnostic_data)
        file_diagnostics[f] = diagnostic_value

save_to_json(file_diagnostics, json_file_path)