In [None]:
import os
import json
import warnings
from PIL import Image
import torch
from transformers import AutoProcessor, GroundingDinoForObjectDetection
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data.datasets import register_coco_instances
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
import detectron2.data.detection_utils as utils
from detectron2.data import build_detection_test_loader

In [None]:
# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning, message=".*loading.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*CUDA.*")

In [None]:
# Initialize processor and model
processor = AutoProcessor.from_pretrained("IDEA-Research/grounding-dino-tiny")
model = GroundingDinoForObjectDetection.from_pretrained("IDEA-Research/grounding-dino-tiny")

In [None]:
def remove_exif(image_path):
    """Remove EXIF data to prevent issues with image size."""
    with Image.open(image_path) as img:
        img = img.convert("RGB")
        img.save(image_path, "JPEG")

def generate_annotations_for_folder(folder_path, category_id, text_prompt):
    annotations = []
    images_info = []
    image_id = 0
    
    for filename in os.listdir(folder_path):
        if filename.lower().endswith((".jpg", ".png")):
            image_path = os.path.join(folder_path, filename)
            remove_exif(image_path)

            with Image.open(image_path) as image:
                width, height = image.size
                inputs = processor(images=image, text=text_prompt, return_tensors="pt")
                with torch.no_grad():
                    outputs = model(**inputs)

                target_sizes = torch.tensor([image.size[::-1]])
                results = processor.image_processor.post_process_object_detection(
                    outputs, threshold=0.1, target_sizes=target_sizes
                )[0]

                for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
                    box = [round(i * width if idx % 2 == 0 else i * height, 1) for idx, i in enumerate(box.tolist())]

                    if box[2] < box[0] or box[3] < box[1]:
                        continue
                    
                    annotation = {
                        "id": len(annotations),
                        "image_id": image_id,
                        "bbox": box,
                        "score": score.item(),
                        "category_id": category_id,
                        "iscrowd": 0,
                        "area": (box[2] - box[0]) * (box[3] - box[1])
                    }
                    annotations.append(annotation)
                
                images_info.append({
                    "id": image_id,
                    "file_name": filename,
                    "width": width,
                    "height": height
                })
                
                image_id += 1

    return images_info, annotations

def save_annotations_to_coco_format(images_info, annotations, output_path):
    coco_format = {
        "images": images_info,
        "annotations": annotations,
        "categories": [{"id": 1, "name": "screw"}, {"id": 2, "name": "bolt"}]
    }
    
    with open(output_path, "w") as f:
        json.dump(coco_format, f, indent=4)

In [None]:
# Define paths
screws_folder = "C:/Users/atulp/Downloads/perspectiv_labs/data/Screws_2024_07_15/"
screws_and_bolts_folder = "C:/Users/atulp/Downloads/perspectiv_labs/data/ScrewAndBolt_20240713/"


In [None]:
# Generate annotations
screws_images, screws_annotations = generate_annotations_for_folder(
    screws_folder, category_id=1, text_prompt="Find many individual small screws, focusing on their shape and size."
)
screws_and_bolts_images, screws_and_bolts_annotations = generate_annotations_for_folder(
    screws_and_bolts_folder, category_id=2, text_prompt="Locate many individual small screws and many individual small bolts, focusing on their distinct shape and size."
)

# Save annotations in COCO format
save_annotations_to_coco_format(
    screws_images, screws_annotations, "C:/Users/atulp/Downloads/perspectiv_labs/AI/annotations/screws_annotations.json"
)
save_annotations_to_coco_format(
    screws_and_bolts_images, screws_and_bolts_annotations, "C:/Users/atulp/Downloads/perspectiv_labs/AI/annotations/screws_and_bolts_annotations.json"
)

In [None]:
# Register datasets with Detectron2
def register_coco_dataset(name, json_file, image_root):
    register_coco_instances(name, {}, json_file, image_root)

def visualize_annotations(dataset_name, image_folder):
    metadata = MetadataCatalog.get(dataset_name)
    dataset_dicts = DatasetCatalog.get(dataset_name)
    
    output_folder = os.path.join(image_folder, "visualizations")
    os.makedirs(output_folder, exist_ok=True)
    
    for d in dataset_dicts:
        img_path = os.path.join(image_folder, d["file_name"])
        image = utils.read_image(img_path, format="BGR")
        
        v = Visualizer(image[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
        out = v.draw_dataset_dict(d)
        vis_image = out.get_image()[:, :, ::-1]
        vis_image_pil = Image.fromarray(vis_image)
        
        output_image_path = os.path.join(output_folder, os.path.basename(d["file_name"]))
        vis_image_pil.save(output_image_path)
        print(f"Annotation visualization saved to {output_image_path}")

In [None]:
# Register datasets
register_coco_dataset("screws", "C:/Users/atulp/Downloads/perspectiv_labs/AI/annotations/screws_annotations.json", "C:/Users/atulp/Downloads/perspectiv_labs/data/Screws_2024_07_15/")
register_coco_dataset("screws_and_bolts", "C:/Users/atulp/Downloads/perspectiv_labs/AI/annotations/screws_and_bolts_annotations.json", "C:/Users/atulp/Downloads/perspectiv_labs/data/ScrewAndBolt_20240713/")

visualize_annotations("screws", "C:/Users/atulp/Downloads/perspectiv_labs/data/Screws_2024_07_15/")
visualize_annotations("screws_and_bolts", "C:/Users/atulp/Downloads/perspectiv_labs/data/ScrewAndBolt_20240713/")


In [None]:
# Training configuration
def train_retinanet():
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml"))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
    cfg.DATASETS.TRAIN = ("screws_and_bolts_train",)
    cfg.DATASETS.TEST = ("screws_val",)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 500
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
    cfg.MODEL.DEVICE = "cpu"
    cfg.OUTPUT_DIR = "./output"
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

    return cfg

# Train the model
cfg = train_retinanet()

In [None]:
def evaluate_model(cfg):
    test_loader = build_detection_test_loader(cfg, "screws_val")
    evaluator = COCOEvaluator("screws_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
    print("Evaluating the model...")
    predictor = DefaultPredictor(cfg)
    results = inference_on_dataset(predictor.model, test_loader, evaluator)
    print(results)

# Evaluate the model
evaluate_model(cfg)

In [None]:
# Define Metadata and visualize results
MetadataCatalog.get("screws_train").thing_classes = ["screw", "bolt"]
MetadataCatalog.get("screws_and_bolts_val").thing_classes = ["screw", "bolt"]


In [None]:
def count_objects_in_images(image_folder, output_folder, cfg):
    predictor = DefaultPredictor(cfg)
    metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
    
    os.makedirs(output_folder, exist_ok=True)
    
    for filename in os.listdir(image_folder):
        if filename.lower().endswith((".jpg", ".png")):
            image_path = os.path.join(image_folder, filename)
            image = utils.read_image(image_path, format="BGR")
            outputs = predictor(image)
            predictions = outputs["instances"].to("cpu")
            
            num_objects = len(predictions)
            print(f"Image: {filename}, Number of objects detected: {num_objects}")

            if num_objects == 0:
                print(f"No objects detected in {filename}.")
                continue

            valid_predictions = predictions[predictions.pred_classes < cfg.MODEL.ROI_HEADS.NUM_CLASSES]

            if len(valid_predictions) == 0:
                print(f"No valid predictions for {filename}.")
                continue

            v = Visualizer(image[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
            try:
                out = v.draw_instance_predictions(valid_predictions)
                vis_image = out.get_image()[:, :, ::-1]
                vis_image_pil = Image.fromarray(vis_image)
                
                output_image_path = os.path.join(output_folder, filename)
                vis_image_pil.save(output_image_path)
                print(f"Visualized image saved to {output_image_path}")
            except IndexError as e:
                print(f"Error visualizing {filename}: {e}")

In [None]:
# Perform object counting and visualization
count_objects_in_images("C:/Users/atulp/Downloads/perspectiv_labs/data/ScrewAndBolt_20240713/", 
                        "C:/Users/atulp/Downloads/perspectiv_labs/data/Output", cfg)