In [1]:
import json
import os
import cv2
from tqdm import tqdm
import numpy as np

# Paths
coco_json_path = r'E:\\Nyi Zaw Aung\\SuLarbMon\\LamenessData\\yolov8\\paper-Train_Val_Test_Accuracy\\may_v1_1000\\performance\\train\\via_project_2Jul2024_22h17m_csv.csv'  # Path to the VIA COCO JSON file
images_dir = r'E:\\Nyi Zaw Aung\\SuLarbMon\\LamenessData\\yolov8\\paper-Train_Val_Test_Accuracy\\may_v1_1000\\performance\\train\\images'  # Directory where images are stored
output_dir = r'E:\\Nyi Zaw Aung\\SuLarbMon\\LamenessData\\yolov8\\paper-Train_Val_Test_Accuracy\\may_v1_1000\\performance\\train_2\\'  # Directory where YOLOv9 dataset will be created

# Create output directories
os.makedirs(os.path.join(output_dir, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'images', 'test'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'labels', 'test'), exist_ok=True)

# Load COCO JSON file
with open(coco_json_path) as f:
    coco_data = json.load(f)

# Process each image and its annotations
for img_info in tqdm(coco_data['images']):
    img_id = img_info['id']
    file_name = img_info['file_name']
    width = img_info['width']
    height = img_info['height']
    
    # Copy image to output directory
    image_path = os.path.join(images_dir, file_name)
    output_image_path = os.path.join(output_dir, 'images', 'train', file_name)  # or 'test' based on your split logic
    os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
    if os.path.exists(image_path):
        img = cv2.imread(image_path)
        cv2.imwrite(output_image_path, img)
    
    # Create label file
    label_file_name = os.path.splitext(file_name)[0] + '.txt'
    output_label_path = os.path.join(output_dir, 'labels', 'train', label_file_name)  # or 'test' based on your split logic
    with open(output_label_path, 'w') as label_file:
        for ann in coco_data['annotations']:
            if ann['image_id'] == img_id:
                category_id = ann['category_id']
                segmentation = ann['segmentation'][0]  # Assuming each annotation has one segmentation polygon
                segmentation = np.array(segmentation).reshape(-1, 2).astype(np.int32)
                mask = np.zeros((height, width), dtype=np.uint8)
                cv2.fillPoly(mask, [segmentation], color=255)
                contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                bbox = cv2.boundingRect(contours[0])
                x_center = (bbox[0] + bbox[2] / 2) / width
                y_center = (bbox[1] + bbox[3] / 2) / height
                bbox_width = bbox[2] / width
                bbox_height = bbox[3] / height
                label_file.write(f"{category_id} {x_center} {y_center} {bbox_width} {bbox_height}\n")

print("Conversion complete!")

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [2]:
import json
import os
import pandas as pd
from tqdm import tqdm
import cv2
import numpy as np

# Paths
#csv_file_path = r'G:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\AllCams-AnnotatedData\Cam7-04\Cam7-04-2024-08-27-72506-72865\cam7-04-300.csv'  # Path to the VIA COCO JSON file
#images_dir = r'G:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\AllCams-AnnotatedData\Cam7-04\Cam7-04-2024-08-27-72506-72865'  # Directory where images are stored
#output_dir = r'G:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\AllCams-AnnotatedData\All_Images\Yolo_all\AllTrainValidYolo'

csv_file_path = r'G:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\Cam60102-70304\cam60102-70304.csv'  # Path to the VIA COCO JSON file
images_dir = r'G:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\Cam60102-70304'  # Directory where images are stored
output_dir = r'G:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\Cam60102-70304\Yolo_Cam60102-70304\training'



# Create output directories
os.makedirs(os.path.join(output_dir, 'images', ), exist_ok=True)
#os.makedirs(os.path.join(output_dir, 'images',), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'labels', ), exist_ok=True)
#os.makedirs(os.path.join(output_dir, 'labels', 'test'), exist_ok=True)

# Load CSV file
df = pd.read_csv(csv_file_path)

# Group by filename
grouped = df.groupby('filename')

# Process each group (image)
for filename, group in tqdm(grouped):
    img_path = os.path.join(images_dir, filename)
    
    # Check if image exists
    if not os.path.exists(img_path):
        print(f"Image {filename} not found, skipping.")
        continue
    
    # Copy image to output directory (assuming all images are used for training, adjust as necessary)
    output_image_path = os.path.join(output_dir, 'images', filename)  # or 'test' based on your split logic
    os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
    img = cv2.imread(img_path)
    cv2.imwrite(output_image_path, img)
    
    # Get image dimensions
    height, width = img.shape[:2]

    # Create label file
    label_file_name = os.path.splitext(filename)[0] + '.txt'
    output_label_path = os.path.join(output_dir, 'labels', label_file_name)  # or 'test' based on your split logic
    with open(output_label_path, 'w') as label_file:
        for _, row in group.iterrows():
            region_shape_attributes = json.loads(row['region_shape_attributes'])
            region_attributes = json.loads(row['region_attributes'])
            try:
                if region_shape_attributes['name'] != 'polygon':
                    continue
            except:
                continue
            class_id = region_attributes.get('Class', '0')  # Default to class 0 if not found
            all_points_x = region_shape_attributes['all_points_x']
            all_points_y = region_shape_attributes['all_points_y']
            
            polygon = np.array(list(zip(all_points_x, all_points_y)), dtype=np.int32)

            # Convert polygon points to a format suitable for YOLO
            normalized_polygon = [(x / width, y / height) for x, y in zip(all_points_x, all_points_y)]
            normalized_polygon_str = ' '.join([f"{x} {y}" for x, y in normalized_polygon])
            
            # Write to label file
            label_file.write(f"{class_id} {normalized_polygon_str}\n")

print("Conversion complete!")


100%|██████████| 700/700 [00:32<00:00, 21.63it/s]

Conversion complete!



