In [1]:
%%writefile to_coco.py

import os
import re
import cv2
import json
import itertools
import numpy as np
from glob import glob
import scipy.io as sio
from PIL import Image


MAX_N = 100

categories = [
    {"id": 0, "name": "Person"},
    {"id": 1, "name": "Ear"},
    {"id": 2, "name": "Earmuffs"},
    {"id": 3, "name": "Face"},
    {"id": 4, "name": "Face-guard"},
    {"id": 5, "name": "Face-mask-medical"},
    {"id": 6, "name": "Foot"},
    {"id": 7, "name": "Tools"},
    {"id": 8, "name": "Glasses"},
    {"id": 9, "name": "Gloves"},
    {"id": 10, "name": "Helmet"},
    {"id": 11, "name": "Hands"},
    {"id": 12, "name": "Head"},
    {"id": 13, "name": "Medical-suit"},
    {"id": 14, "name": "Shoes"},
    {"id": 15, "name": "Safety-suit"},
    {"id": 16, "name": "Safety-vest"},
]
train_processed = valid_processed = test_processed = 0

phases = ["train", "valid", "test"]
for phase in phases:
    label_dir = "datasets/{}/labels".format(phase)
    image_dir = "datasets/{}/images".format(phase)
    
    res_file = {
        "categories": categories,
        "images": [],
        "annotations": []
    }
    json_file = "{}.json".format(phase)
    
    annot_count = 0
    image_id = 0
    processed = 0
    if phase == "test":
        for filename in os.listdir(image_dir):
            img_path = os.path.join(image_dir, filename)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": filename,
                "width": img_w,
                "height": img_h,
            })
            processed += 1
            image_id += 1
        test_processed = processed
        break
        
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            image_extensions = ['.jpeg', '.jpg', '.png']
            image_file_name = None
            for ext in image_extensions:
                image_name = filename.replace('.txt', ext)
                if os.path.exists(os.path.join(image_dir, image_name)):
                    image_file_name = image_name
                    break
            if image_file_name is None:
                print(f"Warning: No image found for {filename}")
                continue
            
            #image_file_name = filename.replace('.txt', '.jpeg')  # Assuming .jpg images
            img_path = os.path.join(image_dir, image_file_name)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": image_file_name,
                "width": img_w,
                "height": img_h,
            })
            with open(os.path.join(label_dir, filename), 'r') as file:
                for line in file:
                    parts = line.strip().split()
                    category_id = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
        
                    # Convert to COCO bounding box format (x_min, y_min, width, height)
                    x_min = x_center - width / 2
                    y_min = y_center - height / 2
                    res_file["annotations"].append({
                        "id": annot_count,
                        "image_id": image_id,
                        "category_id": category_id,
                        "bbox": [x_min, y_min, width, height],
                        "area": width * height,
                        "ignore": 0,
                        "iscrowd": 0
                    })
                    annot_count += 1
            processed += 1
            image_id += 1
        with open(json_file, "w") as f:
            json_str = json.dumps(res_file)
            f.write(json_str)
        #indent 4 or not?
        
        #with open(output_json, 'w') as json_file:
            #json.dump(coco_format, json_file, indent=4)
        print("Processed {} {} images...".format(processed, phase))
        if phase == "train":
            train_processed = processed
        else:
            valid_processed = processed
print('train_processed = {}, valid_processed = {}, test_processed = {}'.format(train_processed, valid_processed, test_processed))
print("Done.")


Overwriting to_coco.py


In [2]:
!python to_coco.py

Processed 1 train images...
Processed 2 train images...
Processed 3 train images...
Processed 4 train images...
Processed 5 train images...
Processed 6 train images...
Processed 7 train images...
Processed 8 train images...
Processed 9 train images...
Processed 10 train images...
Processed 11 train images...
Processed 12 train images...
Processed 13 train images...
Processed 14 train images...
Processed 15 train images...
Processed 16 train images...
Processed 17 train images...
Processed 18 train images...
Processed 19 train images...
Processed 20 train images...
Processed 21 train images...
Processed 22 train images...
Processed 23 train images...
Processed 24 train images...
Processed 25 train images...
Processed 26 train images...
Processed 27 train images...
Processed 28 train images...
Processed 29 train images...
Processed 30 train images...
Processed 31 train images...
Processed 32 train images...
Processed 33 train images...
Processed 34 train images...
Processed 35 train imag

In [3]:
import os
lable_dir = "datasets/train/labels"
print(len(os.listdir(lable_dir)))
lable_dir = "datasets/valid/labels"
print(len(os.listdir(lable_dir)))
lable_dir = "datasets/test/images"
print(len(os.listdir(lable_dir)))

4319
2160
1620


In [1]:
def box_cxcywh_to_xyxy(x):
    #x_min, y_min, w, h = x.unbind(1)
    x_min, y_min, w, h = x
    b = [(x_min), (y_min),
         (x_min + w), (y_min + h)]
    #return torch.stack(b, dim=1)
    return torch.tensor(b)
def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    #print(b)
    return b

In [5]:
%%writefile to_coco.py

import os
import re
import cv2
import json
import itertools
import numpy as np
from glob import glob
import scipy.io as sio
from PIL import Image

MAX_N = 100
categories = [
    {"id": 0, "name": "Person"},
    {"id": 1, "name": "Ear"},
    {"id": 2, "name": "Earmuffs"},
    {"id": 3, "name": "Face"},
    {"id": 4, "name": "Face-guard"},
    {"id": 5, "name": "Face-mask-medical"},
    {"id": 6, "name": "Foot"},
    {"id": 7, "name": "Tools"},
    {"id": 8, "name": "Glasses"},
    {"id": 9, "name": "Gloves"},
    {"id": 10, "name": "Helmet"},
    {"id": 11, "name": "Hands"},
    {"id": 12, "name": "Head"},
    {"id": 13, "name": "Medical-suit"},
    {"id": 14, "name": "Shoes"},
    {"id": 15, "name": "Safety-suit"},
    {"id": 16, "name": "Safety-vest"},
]

train_processed = valid_processed = test_processed = 0

phases = ["train", "valid", "test"]
for phase in phases:
    label_dir = "datasets/{}/labels".format(phase)
    image_dir = "datasets/{}/images".format(phase)
    
    res_file = {
        "categories": categories,
        "images": [],
        "annotations": []
    }
    json_file = "{}.json".format(phase)
    
    annot_count = 0
    image_id = 0
    processed = 0
    if phase == "test":
        for filename in os.listdir(image_dir):
            img_path = os.path.join(image_dir, filename)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": filename,
                "width": img_w,
                "height": img_h,
            })
            processed += 1
            image_id += 1
        with open(json_file, "w") as f:
            json_str = json.dumps(res_file)
            f.write(json_str)
        test_processed = processed
        break
        
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            image_extensions = ['.jpeg', '.jpg', '.png']
            image_file_name = None
            for ext in image_extensions:
                image_name = filename.replace('.txt', ext)
                if os.path.exists(os.path.join(image_dir, image_name)):
                    image_file_name = image_name
                    break
            if image_file_name is None:
                print(f"Warning: No image found for {filename}")
                continue
            
            #image_file_name = filename.replace('.txt', '.jpeg')  # Assuming .jpg images
            img_path = os.path.join(image_dir, image_file_name)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": image_file_name,
                "width": img_w,
                "height": img_h,
            })
            with open(os.path.join(label_dir, filename), 'r') as file:
                for line in file:
                    parts = line.strip().split()
                    category_id = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
        
                    # Convert to COCO bounding box format (x_min, y_min, width, height)
                    x_min = x_center - width / 2
                    y_min = y_center - height / 2
                    res_file["annotations"].append({
                        "id": annot_count,
                        "image_id": image_id,
                        "category_id": category_id,
                        "bbox": [x_min*img_w, y_min*img_h, width*img_w, height*img_h],
                        "area": width * height,
                        "ignore": 0,
                        "iscrowd": 0
                    })
                    print(res_file["annotations"][annot_count]["bbox"])
                    annot_count += 1
            processed += 1
            image_id += 1
            if processed == 100:
                with open(json_file, "w") as f:
                    json_str = json.dumps(res_file)
                    f.write(json_str)
                break
        #indent 4 or not?
        
        #with open(output_json, 'w') as json_file:
            #json.dump(coco_format, json_file, indent=4)
        print("Processed {} {} images...".format(processed, phase))
        if phase == "train":
            train_processed = processed
        else:
            valid_processed = processed
print('train_processed = {}, valid_processed = {}, test_processed = {}'.format(train_processed, valid_processed, test_processed))
print("Done.")

Overwriting to_coco.py


In [6]:
!python to_coco.py

[1937.001, 271.99999999999994, 427.00199999999995, 876.0]
[1.0020000000000029, 2514.9999999999995, 1398.0, 1459.0]
[2022.9989999999998, 1388.0, 2935.002, 1421.0]
[2471.0009999999997, 1024.0, 1498.998, 1099.0]
[23.000999999999937, 0.9999999999998899, 2494.998, 3906.0]
[1467.9989999999998, 1817.9999999999998, 829.998, 2182.0]
Processed 1 train images...
[1189.999104, 607.00032, 1088.0010240000001, 1075.00032]
[1202.999808, 862.9989119999999, 1023.998976, 821.000448]
[2011.99872, 1013.9996160000001, 199.001088, 360.0]
[251.99923200000006, 0.9999359999999768, 2818.000896, 2294.00064]
Processed 2 train images...
[2150.999988, 1370.99856, 79.00139999999999, 91.00071]
[2428.0017239999997, 1349.0006025, 67.999392, 88.00078500000001]
[1819.999788, 1818.9992999999997, 84.99924, 105.00036]
[2441.000028, 1401.0007950000002, 43.99908, 35.9991]
[2169.000222, 1420.0018125000001, 38.999388, 30.999225]
[2014.998966, 1569.999555, 33.001548, 29.99925]
[2200.999146, 1524.0007050000002, 29.000004, 37.99905