In [27]:
%%writefile to_coco100.py

import os
import re
import cv2
import json
import itertools
import numpy as np
from glob import glob
import scipy.io as sio
from PIL import Image


MAX_N = 100

categories = [
    {"id": 0, "name": "Person"},
    {"id": 1, "name": "Ear"},
    {"id": 2, "name": "Earmuffs"},
    {"id": 3, "name": "Face"},
    {"id": 4, "name": "Face-guard"},
    {"id": 5, "name": "Face-mask-medical"},
    {"id": 6, "name": "Foot"},
    {"id": 7, "name": "Tools"},
    {"id": 8, "name": "Glasses"},
    {"id": 9, "name": "Gloves"},
    {"id": 10, "name": "Helmet"},
    {"id": 11, "name": "Hands"},
    {"id": 12, "name": "Head"},
    {"id": 13, "name": "Medical-suit"},
    {"id": 14, "name": "Shoes"},
    {"id": 15, "name": "Safety-suit"},
    {"id": 16, "name": "Safety-vest"},
]
train_processed = valid_processed = test_processed = 0

phases = ["train", "valid"]
for phase in phases:
    label_dir = "temp/datasets/{}/labels100".format(phase)
    image_dir = "temp/datasets/{}/images100".format(phase)
    
    res_file = {
        "categories": categories,
        "images": [],
        "annotations": []
    }
    json_file = "{}100.json".format(phase)
    
    annot_count = 0
    image_id = 0
    processed = 0
    if phase == "test":
        for filename in os.listdir(image_dir):
            img_path = os.path.join(image_dir, filename)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": filename,
                "width": img_w,
                "height": img_h,
            })
            processed += 1
            image_id += 1
        test_processed = processed
        break
        
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            image_extensions = ['.jpeg', '.jpg', '.png']
            image_file_name = None
            for ext in image_extensions:
                image_name = filename.replace('.txt', ext)
                if os.path.exists(os.path.join(image_dir, image_name)):
                    image_file_name = image_name
                    break
            if image_file_name is None:
                print(f"Warning: No image found for {filename}")
                continue
            
            #image_file_name = filename.replace('.txt', '.jpeg')  # Assuming .jpg images
            img_path = os.path.join(image_dir, image_file_name)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": image_file_name,
                "width": img_w,
                "height": img_h,
            })
            with open(os.path.join(label_dir, filename), 'r') as file:
                for line in file:
                    parts = line.strip().split()
                    category_id = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
                    # Convert to COCO bounding box format (x_min, y_min, width, height)
                    x_min = x_center - width / 2
                    y_min = y_center - height / 2
                    res_file["annotations"].append({
                        "id": annot_count,
                        "image_id": image_id,
                        "category_id": category_id,
                        "bbox": [x_min*img_w, y_min*img_h, width*img_w, height*img_h],
                        "area": width *img_w * height * img_h,
                        "iscrowd": 0
                    })
                    annot_count += 1
            processed += 1
            image_id += 1
    with open(json_file, "w") as f:
        json_str = json.dumps(res_file)
        f.write(json_str)
        #indent 4 or not?
        
        #with open(output_json, 'w') as json_file:
            #json.dump(coco_format, json_file, indent=4)
    print("Processed {} {} images...".format(processed, phase))
    if phase == "train":
        train_processed = processed
    else:
        valid_processed = processed
print('train_processed = {}, valid_processed = {}, test_processed = {}'.format(train_processed, valid_processed, test_processed))
print("Done.")


Overwriting to_coco100.py


In [28]:
!python to_coco100.py

Processed 100 train images...
Processed 100 valid images...
train_processed = 100, valid_processed = 100, test_processed = 0
Done.


In [3]:
import os
lable_dir = "datasets/train/labels"
print(len(os.listdir(lable_dir)))
lable_dir = "datasets/valid/labels"
print(len(os.listdir(lable_dir)))
lable_dir = "datasets/test/images"
print(len(os.listdir(lable_dir)))

4319
2160
1620


In [1]:
def box_cxcywh_to_xyxy(x):
    #x_min, y_min, w, h = x.unbind(1)
    x_min, y_min, w, h = x
    b = [(x_min), (y_min),
         (x_min + w), (y_min + h)]
    #return torch.stack(b, dim=1)
    return torch.tensor(b)
def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    #print(b)
    return b

In [9]:
%%writefile to_coco.py

# modified to produce only 10 valid labels/images json file
import os
import re
import cv2
import json
import itertools
import numpy as np
from glob import glob
import scipy.io as sio
from PIL import Image

MAX_N = 100
categories = [
    {"id": 0, "name": "Person"},
    {"id": 1, "name": "Ear"},
    {"id": 2, "name": "Earmuffs"},
    {"id": 3, "name": "Face"},
    {"id": 4, "name": "Face-guard"},
    {"id": 5, "name": "Face-mask-medical"},
    {"id": 6, "name": "Foot"},
    {"id": 7, "name": "Tools"},
    {"id": 8, "name": "Glasses"},
    {"id": 9, "name": "Gloves"},
    {"id": 10, "name": "Helmet"},
    {"id": 11, "name": "Hands"},
    {"id": 12, "name": "Head"},
    {"id": 13, "name": "Medical-suit"},
    {"id": 14, "name": "Shoes"},
    {"id": 15, "name": "Safety-suit"},
    {"id": 16, "name": "Safety-vest"},
]

train_processed = valid_processed = test_processed = 0
phases = ["valid"]
#phases = ["train", "valid", "test"]
for phase in phases:
    label_dir = "datasets/{}/labels10".format(phase)
    image_dir = "datasets/{}/images10".format(phase)
    
    res_file = {
        "categories": categories,
        "images": [],
        "annotations": []
    }
    json_file = "{}{}.json".format(phase, "10")
    print(json_file)
    annot_count = 0
    image_id = 0
    processed = 0
    if phase == "test":
        for filename in os.listdir(image_dir):
            img_path = os.path.join(image_dir, filename)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": filename,
                "width": img_w,
                "height": img_h,
            })
            processed += 1
            image_id += 1
        with open(json_file, "w") as f:
            json_str = json.dumps(res_file)
            f.write(json_str)
        test_processed = processed
        break
        
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            image_extensions = ['.jpeg', '.jpg', '.png']
            image_file_name = None
            for ext in image_extensions:
                image_name = filename.replace('.txt', ext)
                if os.path.exists(os.path.join(image_dir, image_name)):
                    image_file_name = image_name
                    break
            if image_file_name is None:
                print(f"Warning: No image found for {filename}")
                continue
            
            #image_file_name = filename.replace('.txt', '.jpeg')  # Assuming .jpg images
            img_path = os.path.join(image_dir, image_file_name)
            img = Image.open(img_path)
            img_w, img_h = img.size
            res_file["images"].append({
                "id": image_id,
                "file_name": image_file_name,
                "width": img_w,
                "height": img_h,
            })
            with open(os.path.join(label_dir, filename), 'r') as file:
                for line in file:
                    parts = line.strip().split()
                    category_id = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
        
                    # Convert to COCO bounding box format (x_min, y_min, width, height)
                    x_min = x_center - width / 2
                    y_min = y_center - height / 2
                    res_file["annotations"].append({
                        "id": annot_count,
                        "image_id": image_id,
                        "category_id": category_id,
                        "bbox": [x_min*img_w, y_min*img_h, width*img_w, height*img_h],
                        "area": width * height,
                        "ignore": 0,
                        "iscrowd": 0
                    })
                    #print(res_file["annotations"][annot_count]["bbox"])
                    annot_count += 1
            print("Processed {} {} images...".format(processed, phase))
            processed += 1
            image_id += 1            
    with open('datasets/' + json_file, "w") as f:
        json_str = json.dumps(res_file)
        f.write(json_str)
        #indent 4 or not?
        
        #with open(output_json, 'w') as json_file:
            #json.dump(coco_format, json_file, indent=4)
    if phase == "train":
        train_processed = processed
    else:
        valid_processed = processed
print('train_processed = {}, valid_processed = {}, test_processed = {}'.format(train_processed, valid_processed, test_processed))
print("Done.")

Overwriting to_coco.py


In [10]:
!python to_coco.py

valid10.json
Processed 0 valid images...
Processed 1 valid images...
Processed 2 valid images...
Processed 3 valid images...
Processed 4 valid images...
Processed 5 valid images...
Processed 6 valid images...
Processed 7 valid images...
Processed 8 valid images...
Processed 9 valid images...
train_processed = 0, valid_processed = 10, test_processed = 0
Done.


In [1]:
%%writefile get_validGT.py

# modified to produce only 100 valid labels/images json file
import os
import re
import cv2
import json
import itertools
import numpy as np
from glob import glob
import scipy.io as sio
from PIL import Image


train_processed = valid_processed = test_processed = 0
phases = ["valid"]
#phases = ["train", "valid", "test"]
for phase in phases:
    label_dir = "temp/datasets/{}/labels100".format(phase)
    image_dir = "temp/datasets/{}/images100".format(phase)
    res_file = {}
    
    json_file = "{}{}.json".format(phase, "100GT")
    print(json_file)
    annot_count = 0
    image_id = 0
    processed = 0
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            image_extensions = ['.jpeg', '.jpg', '.png']
            image_file_name = None
            for ext in image_extensions:
                image_name = filename.replace('.txt', ext)
                if os.path.exists(os.path.join(image_dir, image_name)):
                    image_file_name = image_name
                    break
            if image_file_name is None:
                print(f"Warning: No image found for {filename}")
                continue
            img_path = os.path.join(image_dir, image_file_name)
            img = Image.open(img_path)
            img_w, img_h = img.size
            
            bboxes = []
            labels = []
            with open(os.path.join(label_dir, filename), 'r') as file:
                for line in file:
                    parts = line.strip().split()
                    category_id = int(parts[0])
                    x_center = float(parts[1])
                    y_center = float(parts[2])
                    width = float(parts[3])
                    height = float(parts[4])
        
                    # Convert to COCO bounding box format (x_min, y_min, width, height)
                    x_min = x_center - width / 2
                    y_min = y_center - height / 2
                    labels.append(category_id)
                    bboxes.append([x_min*img_w, y_min*img_h, width*img_w, height*img_h])
            print("Processed {} {} images...".format(processed, phase))
            res_file[image_file_name] = {
                "boxes": bboxes,
                "labels": labels
            }
            processed += 1
    with open('datasets/' + json_file, "w") as f:
        json_str = json.dumps(res_file)
        f.write(json_str)
        #indent 4 or not?
        
        #with open(output_json, 'w') as json_file:
            #json.dump(coco_format, json_file, indent=4)
    if phase == "train":
        train_processed = processed
    else:
        valid_processed = processed
print('train_processed = {}, valid_processed = {}, test_processed = {}'.format(train_processed, valid_processed, test_processed))
print("Done.")

Writing get_validGT.py


In [2]:
!python get_validGT.py

valid100GT.json
Processed 0 valid images...
Processed 1 valid images...
Processed 2 valid images...
Processed 3 valid images...
Processed 4 valid images...
Processed 5 valid images...
Processed 6 valid images...
Processed 7 valid images...
Processed 8 valid images...
Processed 9 valid images...
Processed 10 valid images...
Processed 11 valid images...
Processed 12 valid images...
Processed 13 valid images...
Processed 14 valid images...
Processed 15 valid images...
Processed 16 valid images...
Processed 17 valid images...
Processed 18 valid images...
Processed 19 valid images...
Processed 20 valid images...
Processed 21 valid images...
Processed 22 valid images...
Processed 23 valid images...
Processed 24 valid images...
Processed 25 valid images...
Processed 26 valid images...
Processed 27 valid images...
Processed 28 valid images...
Processed 29 valid images...
Processed 30 valid images...
Processed 31 valid images...
Processed 32 valid images...
Processed 33 valid images...
Processe

In [1]:
#copying 100 sample data, for upload and easy wget access
import os
import shutil

# Define the source and destination directories
phases = ['train', 'valid']
for phase in phases:
    source_dir = 'datasets/{}/images'.format(phase)
    destination_dir = 'datasets/{}/images100'.format(phase)
    
    # Create the destination directory if it doesn't exist
    os.makedirs(destination_dir, exist_ok=True)
    
    # List files in the source directory
    files = os.listdir(source_dir)
    
    # Copy the first 100 files
    for i, filename in enumerate(files):
        if i >= 100:
            break  # Stop after copying 100 files
    
        # Construct full file paths
        src_file = os.path.join(source_dir, filename)
        dst_file = os.path.join(destination_dir, filename)
    
        # Copy the file
        shutil.copy(src_file, dst_file)
    
    print(f"Copied {min(100, len(files))} files to '{destination_dir}'.")

    source_dir = 'datasets/{}/labels'.format(phase)
    destination_dir = 'datasets/{}/labels100'.format(phase)
    
    # Create the destination directory if it doesn't exist
    os.makedirs(destination_dir, exist_ok=True)
    
    # List files in the source directory
    files = os.listdir(source_dir)
    
    # Copy the first 100 files
    for i, filename in enumerate(files):
        if i >= 100:
            break  # Stop after copying 100 files
    
        # Construct full file paths
        src_file = os.path.join(source_dir, filename)
        dst_file = os.path.join(destination_dir, filename)
    
        # Copy the file
        shutil.copy(src_file, dst_file)
    
    print(f"Copied {min(100, len(files))} files to '{destination_dir}'.")

phase = 'test'
source_dir = 'datasets/{}/images'.format(phase)
destination_dir = 'datasets/{}/images100'.format(phase)

# Create the destination directory if it doesn't exist
os.makedirs(destination_dir, exist_ok=True)

# List files in the source directory
files = os.listdir(source_dir)

# Copy the first 100 files
for i, filename in enumerate(files):
    if i >= 100:
        break  # Stop after copying 100 files

    # Construct full file paths
    src_file = os.path.join(source_dir, filename)
    dst_file = os.path.join(destination_dir, filename)

    # Copy the file
    shutil.copy(src_file, dst_file)

print(f"Copied {min(100, len(files))} files to '{destination_dir}'.")

Copied 100 files to 'datasets/train/images100'.
Copied 100 files to 'datasets/train/labels100'.
Copied 100 files to 'datasets/valid/images100'.
Copied 100 files to 'datasets/valid/labels100'.
Copied 100 files to 'datasets/test/images100'.


In [5]:
print(len(os.listdir('datasets/{}/images100'.format(phase))))

100
