# Object Detection Model For Pixel Prowler

## General Imports

In [None]:
import gc
import os
import json
import math
import shutil
from PIL import Image
import numpy as np
import copy
import random
import string
import io
import sys
import cv2
import time
import datetime

## Pytorch Imports

In [None]:
import torch
import torchvision
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import torchvision.models as models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.ops import sigmoid_focal_loss

## Global Variables

In [None]:

in_dir = "/mnt/nis_lab_research/data/coco_files/eoi/far_shah_b1-b3_EOI"
data_dir = os.path.join("../../data/obj_det", os.path.basename(in_dir))
rand_str = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
res_name = os.path.basename(in_dir) + "_" + rand_str
out_dir = os.path.join("./out", res_name)
os.makedirs(out_dir)
num_test_imgs_out = 100

cuda_device_num = [1]

batch_size = 8  
num_workers = 8 
shuffle = True
img_height = 1080
img_width = 1920

num_classes = 1 + 1
num_epochs = 25
backbone = "resnet50"
pretrained = False
optimizer = "Adam"
learning_rate = 0.001


In [None]:
torch.device("cpu")

## Classes

In [None]:
class CocoDetection(torch.utils.data.Dataset):
    def __init__(self, root, annFile, transform=None):
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transform = transform

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        annotations = coco.loadAnns(ann_ids)

        path = coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.root, path)).convert('RGB')

        boxes = []
        labels = []
        img_ids = []
        for ann in annotations:
            # Convert COCO bbox format (x_min, y_min, width, height) to (x_min, y_min, x_max, y_max)
            x, y, w, h = ann['bbox']
            x_max = x + w
            y_max = y + h

            # Check if the bounding box is valid (positive width and height)
            if w > 0 and h > 0:
                boxes.append([x, y, x_max, y_max])
                labels.append(ann['category_id'])
                img_ids.append(img_id)

        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
            img_ids.append(img_id)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            img_ids.append(img_id)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["img_ids"] = img_ids

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self.ids)

## Functions

In [None]:
# Function to set the device
def set_device(cuda_device_num):
    # Check if CUDA is available
    if torch.cuda.is_available():
        num_devices = torch.cuda.device_count()
        print(f"Number of CUDA devices available: {num_devices}")
        
        # List all available CUDA devices
        for i in range(num_devices):
            print(f"Device {i}: {torch.cuda.get_device_name(i)}")
            
        print()

        if len(cuda_device_num) > 1:
            # Use multiple GPUs with DataParallel if more than one GPU is available
            device = torch.device("cuda")
            print("Using multiple GPUs")
            for i in range(num_devices):
                print(f"Device {i}: {torch.cuda.get_device_name(i)}")
        elif len(cuda_device_num) == 1:
            # Use only one GPU if exactly one GPU is available
            device = torch.device(f"cuda:{cuda_device_num[0]}")
            print(f"Using single GPU: Device {cuda_device_num[0]} -> {torch.cuda.get_device_name(cuda_device_num[0])}")
        else:
            # Fallback to CPU if no GPUs are available
            device = torch.device("cpu")
            print("No GPUs found, using CPU")
    else:
        # Use CPU if CUDA is not available
        device = torch.device("cpu")
        print("CUDA not available, using CPU")
    
    return device

In [None]:
def coco_train_test_split(in_dir, data_dir):
    fn = in_dir.split("/")[-1]
    
    if fn == None:
        fn = in_dir.split("/")[-2]
  
    # data_dir = os.getcwd() + "/" + fn + "_split"
    
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)

        train_dir = data_dir + "/train"
        os.mkdir(train_dir)
        train_img_dir = train_dir + "/images"
        os.mkdir(train_img_dir)

        test_dir = data_dir + "/test"
        os.mkdir(test_dir)
        test_img_dir = test_dir + "/images"
        os.mkdir(test_img_dir)

        train_split = 0.8

        f = open(in_dir + "/result.json")
        coco_json = json.load(f)
        f.close()

        num_img = len(coco_json["images"])

        img_list = coco_json["images"]
        cat_list = coco_json["categories"]
        ann_list = coco_json["annotations"]

        train_num = math.floor(num_img * train_split)

        train_img_list = img_list[0:train_num]
        test_img_list = img_list[train_num:]

        for each in train_img_list:
            img_name = each["file_name"].split("/")[-1]
            shutil.copy(in_dir + "/images/" + img_name, train_img_dir + "/" + img_name)

        for each in test_img_list:
            img_name = each["file_name"].split("/")[-1]
            shutil.copy(in_dir + "/images/" + img_name, test_img_dir + "/" + img_name)

        co_val = train_img_list[-1]["id"]

        train_ann_list = []
        test_ann_list = []

        for each in ann_list:
            if each["image_id"] <= co_val:
                train_ann_list.append(each)
            else:
                test_ann_list.append(each)

        train_json = {
            "images": train_img_list,
            "categories": cat_list,
            "annotations": train_ann_list
        }

        test_json = {
            "images": test_img_list,
            "categories": cat_list,
            "annotations": test_ann_list
        }

        train_j_out = json.dumps(train_json, indent=4)
        test_j_out = json.dumps(test_json, indent=4)

        with open(train_dir + "/result.json", "w") as outfile:
            outfile.write(train_j_out)
        with open(test_dir + "/result.json", "w") as outfile:
            outfile.write(test_j_out)
            
        print("creating " + str(train_split) + " train test split to path: " + data_dir)
        
    else:
        print("directory: " + data_dir + " already exists!")

In [None]:
def cust_coll(batch):

    # Separate data and targets
    batch = list(zip(*batch))

    # Default collate for images
    images = default_collate(batch[0])

    # Targets are a list of dictionaries
    targets = batch[1]

    return images, targets

In [None]:
def get_model(num_classes, backbone, pretrained):
    
    ### OTHER WAY THAT WORKS
    # model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretained=True)
    # num_classes = 27+1
    # in_features = model.roi_heads.box_predictor.cls_score.in_features
    # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    # Load a pre-trained model for the backbone
    backbone = resnet_fpn_backbone(backbone_name=backbone, pretrained=pretrained)
    
    # Create an instance of FasterRCNN with the FPN backbone
    model = FasterRCNN(backbone, num_classes=num_classes)

    # Replace the classifier head of the model with a new one for our number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [None]:
def groupByImgId(res):

    g_img_id = {}

    # Iterate through each annotation in the data
    for ann in res:
        image_id = ann['image_id']

        # If the image_id is not in the dictionary, add it with an empty list
        if image_id not in g_img_id:
            g_img_id[image_id] = []

        # Append the current annotation to the list associated with the image_id
        g_img_id[image_id].append(ann)
    
    return g_img_id

In [None]:
def res_to_data_dict(json_obj, data_dir):
    
    data_dict_list = []
    
    grouped_anns = groupByImgId(json_obj)
    keys = grouped_anns.keys()
    
    with open(os.path.join(data_dir, "test/result.json")) as f:
        og_coco_obj = json.load(f)
    imgs = og_coco_obj["images"]
    
    ann_id = 0
    
    for i, img_id_res in enumerate(keys):
        
        fn = ""
        for img in imgs:
            if img_id_res == img["id"]:
                fn = os.path.join(data_dir, "test", "images", img["file_name"][2:])
                
        ann_list = []
        annotation = {}
        
        for j, ann in enumerate(grouped_anns[img_id_res]): 
    
            # Constants for height and width
            height = img_height
            width = img_width

            # Extract values from the input JSON
            image_id_res= ann["image_id"]
            bbox = ann["bbox"]

            # Convert the bbox format [x1, y1, x2, y2] to [x, y, width, height]
            x1, y1, x2, y2 = bbox
            x, y, width_bb, height_bb = x1, y1, x2 - x1, y2 - y1
            
            # Create the output annotation dictionary
            annotation = {
                "id": ann_id,  # You can use image_id_res as the annotation ID or generate a unique ID
                "image_id": image_id_res,
                "category_id": ann["category_id"],
                "bbox": [x, y, width_bb, height_bb],
                "area": width_bb * height_bb,  # Calculate the area (width * height)
                "iscrowd": 0,  # Assuming not a crowd annotation
                "score": ann["score"]
            }
            
            ann_list.append(annotation)
            
            ann_id = ann_id + 1

        # Create the output data dictionary
        data_dict = {
            "file_name": fn,  # Assuming a filename based on image_id
            "height": height,
            "width": width,
            "annotations": ann_list
        }
        
        data_dict_list.append(data_dict)
    
    return data_dict_list
    
    

## Setting Cuda Devices

In [None]:
# Set the device
device = set_device(cuda_device_num)

In [None]:
torch.cuda.empty_cache()
gc.collect()


## Manipulating & Loading Data

In [None]:
# Creating train and test set
coco_train_test_split(in_dir, data_dir)

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
train_set = CocoDetection(root=os.path.join("../../data/obj_det", os.path.basename(in_dir),"train/images"), 
                          annFile=os.path.join("../../data/obj_det", os.path.basename(in_dir),"train/result.json"), 
                          transform=transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=shuffle, 
                           num_workers=num_workers, collate_fn=cust_coll)

In [None]:
test_set = CocoDetection(root=os.path.join("../../data/obj_det", os.path.basename(in_dir),"test/images"), 
                         annFile=os.path.join("../../data/obj_det", os.path.basename(in_dir),"test/result.json"), 
                         transform=transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, 
                         num_workers=num_workers, collate_fn=cust_coll)

## Creating Model

In [None]:
model = get_model(num_classes, backbone, pretrained)
model.to(device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Training

In [None]:
for epoch in range(num_epochs):
    
    start_time_epoch = time.time()  # Start time of the epoch
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):

        images, targets = data
        images = list(image.to(device) for image in images)

        targets_cln = copy.deepcopy(targets)
        for batch in targets_cln:
            batch.pop('img_ids', None)

        targets_cln = [{k: v.to(device) for k, v in t.items()} for t in targets_cln]

        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets_cln)

        # The loss is the sum of all individual losses
        losses = sum(loss for loss in loss_dict.values())
        print(f"Iteration {i}, Loss: {losses.item()}, Time: {datetime.datetime.now()}")

        # Backward pass
        losses.backward()
        optimizer.step()

        running_loss += losses.item()

    epoch_duration = time.time() - start_time_epoch
    
    print("---")
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}, Duration: {epoch_duration:.2f} seconds, Timestamp: {datetime.datetime.now()}")
    print("---")


In [None]:
# Saving model to file
pth_path = os.path.join(out_dir, "./pth")
if not os.path.exists(pth_path):
    os.makedirs(pth_path)
torch.save(model, os.path.join(pth_path, "model.pth"))

## Testing

In [None]:
# Put the model in evaluation mode
model.eval()

# Load the COCO ground truth
coco_path = os.path.join("../../data/obj_det", os.path.basename(in_dir), "test/result.json")
cocoGt = COCO(coco_path)

img_ids = []
with open(coco_path, "r") as f:
    obj = json.load(f)
for img in obj["images"]:
    img_ids.append(img["id"])

# Prepare for COCO evaluation
results = []
ind = 0

# Start time of the evaluation
start_time_eval = time.time()

with torch.no_grad():
    for images, targets in test_loader:

        images = list(img.to(device) for img in images)
        outputs = model(images)

        for i, output in enumerate(outputs):
            print(f"Evaluating image index: {ind}, Time: {datetime.datetime.now()}")
            img_ids = targets[i]["img_ids"]

            for box, label, score, img_id in zip(output["boxes"], output["labels"], output["scores"], img_ids):
                box = box.cpu().numpy()
                box = [float(n) for n in box]
                score = float(score)
                label = int(label)

                res = {
                    "image_id": img_id,
                    "category_id": label,
                    "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]],
                    "score": score
                }
                results.append(res)
                
            ind += 1

eval_duration = time.time() - start_time_eval
print(f"Total evaluation time: {eval_duration:.2f} seconds, Timestamp: {datetime.datetime.now()}")

res_path = os.path.join(out_dir, "results")
if not os.path.exists(res_path):
    os.makedirs(res_path)
    
# Save the results in a file
with open(os.path.join(res_path, "results.json"), "w") as f:
    json.dump(results, f)

print(f"Results saved in {os.path.join(res_path, 'results.json')}")

## Evaluation

### Quantitative 

In [None]:
# Load results into COCO data structure
cocoDt = cocoGt.loadRes(os.path.join(res_path,"results.json"))

In [None]:
# COCO evaluation
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.params.imgIds = img_ids
cocoEval.evaluate()
cocoEval.accumulate()
original_stdout = sys.stdout 

captured_output = io.StringIO()
sys.stdout = captured_output
cocoEval.summarize()
sys.stdout = original_stdout
summary_str = captured_output.getvalue()

with open(os.path.join(res_path, "scores.txt"), 'w') as file:
    file.write(summary_str)

### Qualitative

In [None]:
# Getting results in data dict format
data_dict_list = res_to_data_dict(results, data_dir)

In [None]:
# Create a dictionary to map category_ids to unique colors
category_colors = {1: "red"}

# Get unique category_ids
unique_category_ids = set()
for data_dict in data_dict_list:
    for annotation in data_dict['annotations']:
        unique_category_ids.add(annotation['category_id'])

# Generate unique colors for each category
for category_id in unique_category_ids:
    color = tuple(np.random.randint(0, 256, 3).tolist())  # Generate a random color for each category
    category_colors[category_id] = color

In [None]:
# 1 = EOI
category_labels = {
    1: '1',
}

In [None]:
# Creating output images directory
img_out_path = os.path.join(out_dir, "imgs")
if not os.path.exists(img_out_path ):
    os.makedirs(img_out_path )

In [None]:
# Loop through data_dict_list and draw bounding boxes with category labels

num_to_proc = min(len(data_dict_list), num_test_imgs_out)

for data_dict in data_dict_list[0:num_to_proc]:
    
    print(data_dict)
    image_path = data_dict['file_name']
    image = cv2.imread(image_path)

    for annotation in data_dict['annotations']:
        bbox = annotation['bbox']
        x, y, w, h = bbox
        x, y, w, h = int(x), int(y), int(w), int(h)

        category_id = annotation['category_id']
        category_label = category_labels.get(category_id, 'Unknown')
        category_color = category_colors.get(category_id, (0, 0, 255))  # Default to blue if not in mapping

        cv2.rectangle(image, (x, y), (x + w, y + h), category_color, 2)  # Draw the bounding box
        cv2.putText(image, category_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, category_color, 2)

    # Save the image with bounding boxes and labels to the output directory
    output_path = os.path.join(img_out_path, os.path.basename(image_path))
    cv2.imwrite(output_path, image)