In [1]:
import os
import cv2
import numpy as np
import csv
from glob import glob

# Base directories
dataset_root = 'open_close'
output_root = 'dataset_masked'
splits = ['train', 'valid']

metadata_dir = os.path.join(output_root, 'metadata')
os.makedirs(metadata_dir, exist_ok=True)
csv_path = os.path.join(metadata_dir, 'summary.csv')

def create_mask(image_shape, polygons):
    mask = np.zeros(image_shape[:2], dtype=np.uint8)
    for pts in polygons:
        pts_array = np.array(pts, dtype=np.int32).reshape((-1, 1, 2))
        cv2.fillPoly(mask, [pts_array], color=255)
    return mask

def parse_annotation(txt_path, width, height):
    polygons = []
    class_counts = {1: 0, 2: 0}
    with open(txt_path, 'r') as f:
        for line_num, line in enumerate(f, 1):
            parts = line.strip().split()
            if len(parts) < 7 or (len(parts) - 1) % 2 != 0:
                print(f"⚠️  Invalid polygon format at {txt_path}, line {line_num}")
                continue

            try:
                class_id = int(parts[0])
                coords = parts[1:]
                pts = []
                for i in range(0, len(coords), 2):
                    x = float(coords[i])
                    y = float(coords[i+1])
                    if not (0 <= x <= 1 and 0 <= y <= 1):
                        print(f"⚠️  Normalized point out of range: ({x}, {y}) in {txt_path}")
                        continue
                    px = int(x * width)
                    py = int(y * height)
                    pts.append((px, py))
                if len(pts) >= 3:
                    polygons.append(pts)
                    if class_id in class_counts:
                        class_counts[class_id] += 1
            except ValueError:
                print(f"⚠️  Non-numeric values in {txt_path}, line {line_num}")
    return polygons, class_counts

def resize_to_256(image_or_mask):
    return cv2.resize(image_or_mask, (256, 256), interpolation=cv2.INTER_AREA)

def process_dataset():
    with open(csv_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['image_path', 'description'])

        for split in splits:
            print(f"Processing split: {split}")
            img_dir = os.path.join(dataset_root, split, 'images')
            label_dir = os.path.join(dataset_root, split, 'labels')

            new_img_dir = os.path.join(output_root, split, 'images')
            new_label_dir = os.path.join(output_root, split, 'labels')
            os.makedirs(new_img_dir, exist_ok=True)
            os.makedirs(new_label_dir, exist_ok=True)

            for img_path in glob(os.path.join(img_dir, '*')):
                img_name = os.path.basename(img_path)
                base_name = os.path.splitext(img_name)[0]
                txt_path = os.path.join(label_dir, base_name + '.txt')

                image = cv2.imread(img_path)
                if image is None:
                    print(f"Could not read image: {img_path}")
                    continue

                height, width = image.shape[:2]

                if not os.path.exists(txt_path):
                    print(f"Warning: Label file not found for {img_name}")
                    continue

                polygons, class_counts = parse_annotation(txt_path, width, height)
                mask = create_mask(image.shape, polygons)

                # Resize
                resized_image = resize_to_256(image)
                resized_mask = resize_to_256(mask)

                # Save
                save_img_path = os.path.join(new_img_dir, img_name)
                save_mask_path = os.path.join(new_label_dir, base_name + '.jpg')
                cv2.imwrite(save_img_path, resized_image)
                cv2.imwrite(save_mask_path, resized_mask)

                # Write to CSV
                abs_img_path = os.path.abspath(save_img_path)
                desc = f"There are {class_counts.get(2, 0)} open-saffron flower and {class_counts.get(1, 0)} close-saffron flower"
                writer.writerow([abs_img_path, desc])

    print(f"\n✅ Conversion complete. Metadata saved at: {csv_path}")

if __name__ == '__main__':
    process_dataset()


Processing split: train
Processing split: valid

✅ Conversion complete. Metadata saved at: dataset_masked/metadata/summary.csv


In [2]:
import os
import random
import cv2  # <- Required for grayscale conversion

random.seed(42)  # for reproducibility

# Original dataset paths
train_img_dir = 'dataset_masked/train/images'
val_img_dir = 'dataset_masked/valid/images'

# Mask dirs (output)
train_mask_dir = 'dataset_masked/train/labels'
val_mask_dir = 'dataset_masked/valid/labels'

# Output txt files
data_path = './dataset_masked/txt/train_path.txt'
mask_path = './dataset_masked/txt/train_mask_path.txt'
val_path = './dataset_masked/txt/val_path.txt'
val_mask_path = './dataset_masked/txt/val_mask_path.txt'
test_path = './dataset_masked/txt/test_path.txt'
test_mask_path = './dataset_masked/txt/test_mask_path.txt'

os.makedirs('./dataset_masked/txt', exist_ok=True)

# 1. Get all train images
train_images = sorted([f for f in os.listdir(train_img_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])

# 2. Split into train (90%) and val (10%)
split_idx = int(len(train_images) * 0.9)
train_split = train_images[:split_idx]
val_split = train_images[split_idx:]

# 3. Get all val images (will be used as test set)
test_images = sorted([f for f in os.listdir(val_img_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])

def convert_to_grayscale_if_needed(mask_path):
    """Ensure the mask is in grayscale and overwrite if not."""
    mask = cv2.imread(mask_path)
    if mask is None:
        print(f"[WARNING] Could not read mask: {mask_path}")
        return
    if len(mask.shape) == 3:  # If not grayscale
        gray_mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(mask_path, gray_mask)

def write_paths(images, img_dir, mask_dir, img_outfile, mask_outfile):
    with open(img_outfile, 'w') as f_img, open(mask_outfile, 'w') as f_mask:
        for img_name in images:
            img_path = os.path.abspath(os.path.join(img_dir, img_name))
            base_name = os.path.splitext(img_name)[0]
            mask_name = base_name + '.jpg'
            mask_path_full = os.path.abspath(os.path.join(mask_dir, mask_name))

            if not os.path.exists(img_path):
                print(f"Warning: image not found {img_path}")
                continue
            if not os.path.exists(mask_path_full):
                print(f"Warning: mask not found {mask_path_full}")
                continue

            # ✅ Convert to grayscale if needed
            convert_to_grayscale_if_needed(mask_path_full)

            f_img.write(img_path + '\n')
            f_mask.write(mask_path_full + '\n')

print("Writing train paths...")
write_paths(train_split, train_img_dir, train_mask_dir, data_path, mask_path)

print("Writing val paths...")
write_paths(val_split, train_img_dir, train_mask_dir, val_path, val_mask_path)

print("Writing test paths...")
write_paths(test_images, val_img_dir, val_mask_dir, test_path, test_mask_path)

print("Done!")


Writing train paths...
Writing val paths...
Writing test paths...
Done!


In [5]:
import os
import pandas as pd
from sentence_transformers import SentenceTransformer

# Ensure compatibility
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# Load SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Read input CSV (no header)
csv_path = './dataset_masked/metadata/summary.csv'
df = pd.read_csv(csv_path, header=None)

# Extract path and sentence columns
paths = df[0].tolist()        # First column
sentences = df[1].tolist()    # Second column

# Generate embeddings
embeddings = model.encode(sentences, convert_to_numpy=True, show_progress_bar=True)

# Combine paths + embeddings
embedding_df = pd.DataFrame(embeddings)
embedding_df.insert(0, 'path', paths)  # Insert path as the first column

# Save to new CSV
embedding_df.to_csv('./dataset_masked/metadata/sentence_embeddings.csv', index=False)

print(f"✅ Saved {len(embeddings)} embeddings with path column. Shape: {embeddings.shape}")


Batches:   0%|          | 0/22 [00:00<?, ?it/s]

✅ Saved 689 embeddings with path column. Shape: (689, 384)
