### 1. Export the Roboflow Dataset
**First, export your dataset from Roboflow in the COCO JSON format. This will ensure compatibility between the two datasets. Roboflow allows you to export datasets in various formats, and COCO JSON format is typically used for object detection tasks.**

### 2. Download the COCO Dataset
**Download the COCO dataset from the official website. Make sure to get the annotations in the COCO JSON format as well. Depending on your specific use case (e.g., object detection), you might need only certain parts of the COCO dataset.**

### 3. Consolidate Image Files
**Place all the image files from both datasets into a single directory. This might involve renaming files to ensure there are no naming conflicts between the two datasets.**

### 4. Merge Annotation Files
**Merge the JSON files from both datasets. This can be the most complex step, depending on the specifics of the datasets. Here’s a rough Python pseudocode to guide you through merging the JSON files:**

### 5. Validation
**After merging the datasets, validate the new dataset to ensure there are no issues with the image paths or annotation formats. You can use visualization tools like CVAT or any custom scripts to check if the bounding boxes are correctly placed over the objects.**

### 6. Use in Your Model
**Once the merged dataset is validated, you can use it to train your model. Make sure to adjust any paths or configurations that reference the dataset locations.**

**If you run into any specific issues while merging the datasets or have more detailed questions about the process, feel free to ask!**

In [10]:
import json

import json

def load_json(file_path):
    """ Load a JSON file and return the data. """
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
        return data
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: The file {file_path} is not a valid JSON file.")
        return None

def save_json(data, file_path):
    """ Save a dictionary to a JSON file. """
    try:
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)
        print(f"Data successfully saved to {file_path}.")
    except Exception as e:
        print(f"Failed to save data to {file_path}: {e}")

def change_category_id(roboflow_path, output_path):
    """ Change category_id from 1 to 10 in Roboflow dataset. """
    roboflow_data = load_json(roboflow_path)

    if not roboflow_data:
        print("Failed to load Roboflow dataset.")
        return

    try:
        # Change category_id from 1 to 10 in Roboflow data
        for ann in roboflow_data['annotations']:
            if ann['category_id'] == 1:
                ann['category_id'] = 10

        # Save the modified Roboflow JSON file
        save_json(roboflow_data, output_path)

    except KeyError as e:
        print(f"Key error: {e}. Please check that the JSON file contains the required keys.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

def merge_datasets(coco_path, roboflow_path, output_path):
    """ Merge COCO and Roboflow datasets. """
    coco_data = load_json(coco_path)
    roboflow_data = load_json(roboflow_path)

    if not coco_data or not roboflow_data:
        print("Failed to load datasets. Aborting merge.")
        return

    try:
        # Avoid ID conflicts in images and annotations
        max_image_id = max(img['id'] for img in coco_data['images'])
        max_annotation_id = max(ann['id'] for ann in coco_data['annotations'])

        # Increment IDs in Roboflow data to avoid duplication
        for img in roboflow_data['images']:
            img['id'] += max_image_id
        for ann in roboflow_data['annotations']:
            ann['id'] += max_annotation_id
            ann['image_id'] += max_image_id

        # Merge images and annotations
        merged_images = coco_data['images'] + roboflow_data['images']
        merged_annotations = coco_data['annotations'] + roboflow_data['annotations']

        # Remove duplicates by ensuring unique IDs
        unique_images = {img['id']: img for img in merged_images}.values()
        unique_annotations = {ann['id']: ann for ann in merged_annotations}.values()

        # Create a new merged dataset
        merged_data = {
            "images": list(unique_images),
            "annotations": list(unique_annotations),
            # Include other necessary fields like info, licenses, categories from the original dataset if needed
        }

        # Save the merged JSON file
        save_json(merged_data, output_path)

    except KeyError as e:
        print(f"Key error: {e}. Please check that both JSON files contain the required keys.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

change_category_id('./data/train/_annotations.coco.json', './data/train.json')
change_category_id('./data/valid/_annotations.coco.json', './data/valid.json')        
# merging for train
merge_datasets('./data/annotations/instances_train2017.json', './data/train.json', './data/train_merged.json')
#merging for validation

merge_datasets('./data/annotations/instances_val2017.json', './data/valid.json', './data/val_merged.json')

Data successfully saved to ./data/train.json.
Data successfully saved to ./data/valid.json.
Data successfully saved to ./data/train_merged.json.
Data successfully saved to ./data/val_merged.json.


### Converting coco json to yolo format

In [1]:
import json
import os

def convert_coco_json_to_yolo(coco_json_path, output_dir):
    # Load COCO JSON annotations
    with open(coco_json_path, 'r') as f:
        data = json.load(f)
    
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Create a dictionary to hold image metadata
    image_data = {image['id']: {'filename': image['file_name'], 'width': image['width'], 'height': image['height']} for image in data['images']}

    # Process annotations
    for annotation in data['annotations']:
        img_id = annotation['image_id']
        category_id = annotation['category_id'] - 1  # Adjust category_id to be zero-indexed if necessary
        bbox = annotation['bbox']
        # Calculate YOLO format coordinates (normalized)
        x_center = (bbox[0] + bbox[2] / 2) / image_data[img_id]['width']
        y_center = (bbox[1] + bbox[3] / 2) / image_data[img_id]['height']
        width = bbox[2] / image_data[img_id]['width']
        height = bbox[3] / image_data[img_id]['height']
        
        # Write to file
        annotation_file = os.path.join(output_dir, os.path.splitext(image_data[img_id]['filename'])[0] + '.txt')
        with open(annotation_file, 'a') as file:
            file.write(f"{category_id} {x_center} {y_center} {width} {height}\n")

    # Create train.txt file listing all images
    with open(os.path.join(output_dir, 'train.txt'), 'w') as file:
        for img_id in image_data:
            file_path = os.path.join(output_dir, image_data[img_id]['filename'])
            file.write(file_path + '\n')

convert_coco_json_to_yolo('./data/Merged Datasets/train_merged.json', './data/Merged Datasets/train-merged/labels')
convert_coco_json_to_yolo('./data/Merged Datasets/val_merged.json', './data/Merged Datasets/val-merged/labels')

In [1]:

import yaml

data = {
    "train": "train-merged/images",
    "val": "val-merged/images",
    "nc": 91,  # Update with the number of unique classes in your dataset
    "names": [
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"street sign",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"hat",
"backpack",
"umbrella",
"shoe",
"eye glasses",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"plate",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"mirror",
"dining table",
"window",
"desk",
"toilet",
"door",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"blender",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
"hair brush"
]
}

# Write the dictionary to a YAML file
with open('data.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)