In [2]:
import os
import json

img_train_Dir = 'data/dataset_v4/train/images/'
labels_train_Dir = 'data/dataset_v4/train/labels/'
img_val_Dir = 'data/dataset_v4/val/images/'
labels_val_Dir = 'data/dataset_v4/val/labels/'

class_data_dir = 'data/classification_data/'

In [3]:
#### class id to class name #### 
def id_to_class_name(labels_dir, id_to_name_dict):
    files_changed = 0
    for label_file in os.listdir(labels_dir):
        if label_file.endswith('.txt'):
            with open(os.path.join(labels_dir, label_file), 'r') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                class_id = parts[0]
                class_name = id_to_name_dict[class_id]
                new_line = ' '.join([class_name] + parts[1:]) + '\n'
                new_lines.append(new_line)
            
            with open(os.path.join(labels_dir, label_file), 'w') as f:
                f.writelines(new_lines)
                files_changed += 1
    print(f"Converted class IDs to names in {files_changed} files.")

#### class name to class id ####

def class_name_to_id(labels_dir, name_to_id_dict):
    files_changed = 0
    for label_file in os.listdir(labels_dir):
        if label_file.endswith('.txt'):
            with open(os.path.join(labels_dir, label_file), 'r') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                class_name = parts[0]
                class_id = str(name_to_id_dict[class_name])
                new_line = ' '.join([class_id] + parts[1:]) + '\n'
                new_lines.append(new_line)
            
            with open(os.path.join(labels_dir, label_file), 'w') as f:
                f.writelines(new_lines)
                files_changed += 1
    print(f"Converted class names to IDs in {files_changed} files.")


In [8]:
#### Load mapping dictionaries ####
with open("pipeline_data/dicts/detect_classes_v4.json", "r") as f:
    id_to_name = json.load(f)

name_to_id = {v: k for k, v in id_to_name.items()}

# Example usage (almonds class):
print(id_to_name["0"])
print(name_to_id["almonds"])

almonds
0


In [11]:
import shutil

### Copy all images and labels to classification_data dir ###
all_images_dir = "data/classification_data/all_data/images/"
all_labels_dir = "data/classification_data/all_data/labels/"

for file_name in os.listdir(img_train_Dir):
    if file_name.endswith('.jpg'):
        src_img_path = os.path.join(img_train_Dir, file_name)
        dst_img_path = os.path.join(all_images_dir, file_name)
        shutil.copy(src_img_path, dst_img_path)

        src_label_path = os.path.join(labels_train_Dir, file_name.replace('.jpg', '.txt'))
        dst_label_path = os.path.join(all_labels_dir, file_name.replace('.jpg', '.txt'))
        shutil.copy(src_label_path, dst_label_path)

for file_name in os.listdir(img_val_Dir):
    if file_name.endswith('.jpg'):
        src_img_path = os.path.join(img_val_Dir, file_name)
        dst_img_path = os.path.join(all_images_dir, file_name)
        shutil.copy(src_img_path, dst_img_path  )

        src_label_path = os.path.join(labels_val_Dir, file_name.replace('.jpg', '.txt'))
        dst_label_path = os.path.join(all_labels_dir, file_name.replace('.jpg', '.txt'))
        shutil.copy(src_label_path, dst_label_path)
        
print("Copied all images and labels to classification_data/all_data/")

Copied all images and labels to classification_data/all_data/


In [12]:
id_to_class_name(all_labels_dir, id_to_name)

Converted class IDs to names in 23994 files.


In [30]:
def crop_image(image_path, label_path, scale=1.1):
    from PIL import Image
    import numpy as np

    img = Image.open(image_path)
    img_width, img_height = img.size

    with open(label_path, 'r') as f:
        lines = f.readlines()

    for line in lines:
        parts = line.strip().split()
        class_name = parts[0]
        x_center = float(parts[1]) * img_width
        y_center = float(parts[2]) * img_height
        width = float(parts[3]) * img_width
        height = float(parts[4]) * img_height

        width *= scale
        height *= scale

        x_min = max(0, int(x_center - width / 2))
        y_min = max(0, int(y_center - height / 2))
        x_max = min(img_width, int(x_center + width / 2))
        y_max = min(img_height, int(y_center + height / 2))

        cropped_img = img.crop((x_min, y_min, x_max, y_max))

        return class_name, cropped_img

In [33]:
img = '00000001.jpg'
label = '00000001.txt'

class_name, cropped_img = crop_image(os.path.join(all_images_dir, img), os.path.join(all_labels_dir, label))
print(f"Cropped image class: {class_name}")
cropped_img.show()

Cropped image class: potato


In [40]:
### train val split for all_data ###
import random
all_data_dir = 'data/classification_data/all_data/'
all_images = [f for f in os.listdir(os.path.join(all_data_dir, 'images')) if f.endswith('.jpg')]
random.shuffle(all_images)
split_index = int(0.8 * len(all_images))
train_images = all_images[:split_index]
val_images = all_images[split_index:]

train_dir = 'data/classification_data/all_data/train/'
val_dir = 'data/classification_data/all_data/val/'
os.makedirs(os.path.join(train_dir, 'images'), exist_ok=True)
os.makedirs(os.path.join(train_dir, 'labels'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'images'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'labels'), exist_ok=True)

## move train images and labels ##
for img_file in train_images:
    src_img_path = os.path.join(all_data_dir, 'images', img_file)
    dst_img_path = os.path.join(train_dir, 'images', img_file)
    shutil.move(src_img_path, dst_img_path)

    label_file = img_file.replace('.jpg', '.txt')
    src_label_path = os.path.join(all_data_dir, 'labels', label_file)
    dst_label_path = os.path.join(train_dir, 'labels', label_file)
    shutil.move(src_label_path, dst_label_path)

## move val images and labels ##
for img_file in val_images:
    src_img_path = os.path.join(all_data_dir, 'images', img_file)
    dst_img_path = os.path.join(val_dir, 'images', img_file)
    shutil.move(src_img_path, dst_img_path)

    label_file = img_file.replace('.jpg', '.txt')
    src_label_path = os.path.join(all_data_dir, 'labels', label_file)
    dst_label_path = os.path.join(val_dir, 'labels', label_file)
    shutil.move(src_label_path, dst_label_path)

In [41]:
with open("pipeline_data/dicts/classification_groups.json", "r") as f:
    classification_groups = json.load(f)

In [None]:
os.makedirs(os.path.join(class_data_dir, 'meat/images/train'), exist_ok=True)

train_images_dir = os.path.join(all_data_dir, 'train', 'images')
train_labels_dir = os.path.join(all_data_dir, 'train', 'labels')
meat_dir = os.path.join(class_data_dir, 'meat')

meat_classes = list(classification_groups['meat'].values())
name_to_id_meat = {name: id for id, name in classification_groups['meat'].items()}

image_labels = []
for image_file in os.listdir(train_images_dir):
    if image_file.endswith('.jpg'):
        label_file = image_file.replace('.jpg', '.txt')
        label_path = os.path.join(train_labels_dir, label_file)

        with open(label_path, 'r') as f:
            lines = f.readlines()

        for line in lines:
            parts = line.strip().split()
            class_name = parts[0]

            if class_name in meat_classes:
                class_name, cropped_img = crop_image(os.path.join(train_images_dir, image_file), label_path)

                class_image_dir = os.path.join(meat_dir, 'images', 'train')
                os.makedirs(class_image_dir, exist_ok=True)

                cropped_img_save_path = os.path.join(class_image_dir, f"{os.path.splitext(image_file)[0]}_{class_name}.jpg")
                cropped_img.save(cropped_img_save_path)
                image_labels.append((cropped_img_save_path, name_to_id_meat[class_name]))

### Save labels for meat classification as csv file ###
import pandas as pd
labels_df = pd.DataFrame(image_labels, columns=['image', 'class_id'])
labels_df.to_csv(os.path.join(meat_dir, 'meat_labels_train.csv'), index=False)

'2'