# 4B. JSON Append augmented tiles to JSON

In [None]:
# The purpose of this code is to open the dataset_bin.JSON file and add new lines, which correspond to newly created augmented tiles and save a new version of JSON
# The use of this individual code instead of "4B. JSON merge" makes sure the specific tiles and their augmented version are only in train, validation or test data - to prevent the data leakage 

In [6]:
import os
import json

def load_json(json_path):
    with open(json_path, 'r') as f:
        return json.load(f)

def save_json(data, output_path):
    with open(output_path, 'w') as f:
        json.dump(data, f, indent=4)

def get_tileid_set(split):
    return {entry["tileid"]: entry for entry in split}

def augment_dataset(json_path, augmented_dir_images, augmented_dir_labels, output_json_path=None):
    data = load_json(json_path)

    # Mapping of tileid to split
    training_tiles = get_tileid_set(data.get("training", []))
    validation_tiles = get_tileid_set(data.get("validation", []))
    testing_tiles = get_tileid_set(data.get("testing", []))

    all_tile_sets = {
        "training": training_tiles,
        "validation": validation_tiles,
        "testing": testing_tiles
    }

    aug_files = os.listdir(augmented_dir_images)
    added_count = 0

    for file in aug_files:
        if "_aug" in file and file.endswith('.npy'):
            tile_base = file.split('_aug')[0]

            for split_name, tile_dict in all_tile_sets.items():
                if tile_base in tile_dict:
                    aug_entry = {
                        "image": f"data/npy_images/{file}",
                        "label": f"data/npy_masks/{file}",
                        "tileid": tile_base
                    }
                    data[split_name].append(aug_entry)
                    added_count += 1
                    break

    print(f"Added {added_count} augmented samples.")

    output_path = output_json_path or json_path
    save_json(data, output_path)
    print(f"Updated JSON saved to: {output_path}")

# === USAGE ===
json_input = "E:/ML/Levees/Datasets/N48/dataset_bin_48/dataset_bin.json"                # Path to original JSON
augmented_folder_images = "E:/ML/Levees/Datasets/N48/dataset_bin_48/npy_images_"     # Folder with augmented image files
augmented_folder_labels = "E:/ML/Levees/Datasets/N48/dataset_bin_48/npy_masks_"      # Folder with augmented mask files
json_output = "E:/ML/Levees/Datasets/N48/dataset_bin_48/dataset_bin_up.json"       # Output path for updated JSON

augment_dataset(json_input, augmented_folder_images, augmented_folder_labels, json_output)


Added 1824 augmented samples.
Updated JSON saved to: E:/ML/Levees/Datasets/N48/dataset_bin_48/dataset_bin_up.json
