In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install torch torchvision opencv-python matplotlib numpy tqdm pycocotools

import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F


Mounted at /content/drive


In [None]:

BASE_PATH = "/content/drive/My Drive/Colab Notebooks/Thesis_Dataset/Dataset1"
TRAIN_IMG = f"{BASE_PATH}/train/images"
TRAIN_LBL = f"{BASE_PATH}/train/labels"
TEST_IMG  = f"{BASE_PATH}/test/images"
TEST_LBL  = f"{BASE_PATH}/test/labels"

CLASSES = ["propeller","pipe_type2","red_fin","net","qr_codes","pipe"]
NUM_CLASSES = len(CLASSES) + 1  # + background

def read_yolo_labels(txt_file, W, H):
    boxes, labels = [], []
    if not os.path.exists(txt_file):
        return torch.zeros((0,4)), torch.zeros((0,), dtype=torch.int64)

    with open(txt_file) as f:
        for line in f:
            c,x,y,w,h = map(float, line.split())
            x1 = (x - w/2) * W
            y1 = (y - h/2) * H
            x2 = (x + w/2) * W
            y2 = (y + h/2) * H
            boxes.append([x1,y1,x2,y2])
            labels.append(int(c)+1)

    return torch.tensor(boxes, dtype=torch.float32), torch.tensor(labels, dtype=torch.int64)

class YoloDataset(Dataset):
    def __init__(self, img_dir, lbl_dir):
        self.img_dir = img_dir
        self.lbl_dir = lbl_dir
        self.images = [x for x in os.listdir(img_dir) if x.endswith(".jpg")]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.img_dir, img_name)
        lbl_path = os.path.join(self.lbl_dir, img_name.replace(".jpg",".txt"))

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        H,W,_ = img.shape

        boxes, labels = read_yolo_labels(lbl_path, W, H)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        img = F.to_tensor(img)
        return img, target

train_ds = YoloDataset(TRAIN_IMG, TRAIN_LBL)
test_ds  = YoloDataset(TEST_IMG, TEST_LBL)

train_loader = DataLoader(train_ds, batch_size=2, shuffle=True,
                          collate_fn=lambda x: tuple(zip(*x)))
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False,
                          collate_fn=lambda x: tuple(zip(*x)))



In [None]:
# -------- MODEL --------
model = fasterrcnn_resnet50_fpn(weights="DEFAULT")

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# -------- TRAIN --------
EPOCHS = 5
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for imgs, targets in tqdm(train_loader):
        imgs = [img.to(device) for img in imgs]
        targets = [{k:v.to(device) for k,v in t.items()} for t in targets]

        loss_dict = model(imgs, targets)
        loss = sum(v for v in loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} Loss:", total_loss/len(train_loader))

torch.save(model.state_dict(), "/content/faster_rcnn_fixed.pth")
print("✔ Faster R-CNN trained")


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 200MB/s]
100%|██████████| 1267/1267 [27:36<00:00,  1.31s/it]


Epoch 1/5 Loss: 0.20476899881448066


 16%|█▌        | 200/1267 [02:10<11:39,  1.52it/s]

In [None]:

# -------- IoU --------
def IoU(a,b):
    xA=max(a[0],b[0]); yA=max(a[1],b[1])
    xB=min(a[2],b[2]); yB=min(a[3],b[3])
    inter=max(0,xB-xA)*max(0,yB-yA)
    areaA=(a[2]-a[0])*(a[3]-a[1])
    areaB=(b[2]-b[0])*(b[3]-b[1])
    return inter/(areaA+areaB-inter) if areaA+areaB-inter else 0

# -------- TEST --------
model.eval()
correct = 0
total = 0

for imgs, targets in tqdm(test_loader):
    img = imgs[0].to(device)
    gt_boxes = targets[0]["boxes"].cpu().numpy()
    gt_labels = targets[0]["labels"].cpu().numpy()

    preds = model([img])[0]
    boxes = preds["boxes"].cpu().numpy()
    labels = preds["labels"].cpu().numpy()

    for b,l in zip(boxes,labels):
        total += 1
        for gb,gl in zip(gt_boxes,gt_labels):
            if l==gl and IoU(b,gb)>=0.5:
                correct += 1
                break

print("Faster R-CNN Detection Accuracy (%):", correct/total*100)

# -------- VISUAL --------
img_name = os.listdir(TEST_IMG)[0]
img = cv2.imread(os.path.join(TEST_IMG, img_name))
H,W,_ = img.shape
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

lbl_path = os.path.join(TEST_LBL, img_name.replace(".jpg",".txt"))
gt_boxes,_ = read_yolo_labels(lbl_path, W, H)

for b in gt_boxes:
    x1,y1,x2,y2 = map(int,b)
    cv2.rectangle(img_rgb,(x1,y1),(x2,y2),(0,0,255),2)

pred = model([F.to_tensor(img_rgb).to(device)])[0]
for b in pred["boxes"].cpu().numpy().astype(int):
    x1,y1,x2,y2 = b
    cv2.rectangle(img_rgb,(x1,y1),(x2,y2),(0,255,0),2)

plt.imshow(img_rgb)
plt.title("Faster R-CNN: GT (Red) vs Pred (Green)")
plt.axis("off")
plt.show()


In [1]:
# Setup
from google.colab import drive
drive.mount('/content/drive')

!pip install -q torch torchvision opencv-python matplotlib numpy tqdm pycocotools

import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F


Mounted at /content/drive


In [2]:

# Paths & Classes
BASE_PATH = "/content/drive/My Drive/Colab Notebooks/Thesis_Dataset/Dataset1"
TRAIN_IMG = f"{BASE_PATH}/train/images"
TRAIN_LBL = f"{BASE_PATH}/train/labels"
TEST_IMG  = f"{BASE_PATH}/test/images"
TEST_LBL  = f"{BASE_PATH}/test/labels"

CLASSES = ["propeller","pipe_type2","red_fin","net","qr_codes","pipe"]
NUM_CLASSES = len(CLASSES) + 1  # + background



In [3]:
# Helpers
def read_yolo_labels(txt_file, W, H):
    boxes, labels = [], []
    if not os.path.exists(txt_file):
        return torch.zeros((0,4)), torch.zeros((0,), dtype=torch.int64)

    with open(txt_file) as f:
        for line in f:
            c,x,y,w,h = map(float, line.split())
            x1 = (x - w/2) * W
            y1 = (y - h/2) * H
            x2 = (x + w/2) * W
            y2 = (y + h/2) * H
            boxes.append([x1,y1,x2,y2])
            labels.append(int(c)+1)

    return torch.tensor(boxes, dtype=torch.float32), torch.tensor(labels, dtype=torch.int64)

def IoU(a,b):
    xA=max(a[0],b[0]); yA=max(a[1],b[1])
    xB=min(a[2],b[2]); yB=min(a[3],b[3])
    inter=max(0,xB-xA)*max(0,yB-yA)
    areaA=(a[2]-a[0])*(a[3]-a[1])
    areaB=(b[2]-b[0])*(b[3]-b[1])
    return inter/(areaA+areaB-inter) if areaA+areaB-inter else 0



In [4]:
# Dataset
class YoloDataset(Dataset):
    def __init__(self, img_dir, lbl_dir):
        self.img_dir = img_dir
        self.lbl_dir = lbl_dir
        self.images = sorted([x for x in os.listdir(img_dir) if x.endswith(".jpg")])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.img_dir, img_name)
        lbl_path = os.path.join(self.lbl_dir, img_name.replace(".jpg",".txt"))

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        H,W,_ = img.shape

        boxes, labels = read_yolo_labels(lbl_path, W, H)

        target = {"boxes": boxes, "labels": labels}
        img = F.to_tensor(img)
        return img, target

train_ds = YoloDataset(TRAIN_IMG, TRAIN_LBL)
test_ds  = YoloDataset(TEST_IMG, TEST_LBL)

train_loader = DataLoader(train_ds, batch_size=2, shuffle=True,
                          collate_fn=lambda x: tuple(zip(*x)))
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False,
                          collate_fn=lambda x: tuple(zip(*x)))



In [5]:
#Model
model = fasterrcnn_resnet50_fpn(weights="DEFAULT")

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)

# Speed optimization
model.rpn.pre_nms_top_n_train = 1000
model.rpn.post_nms_top_n_train = 500
model.rpn.pre_nms_top_n_test = 1000
model.rpn.post_nms_top_n_test = 500

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
scaler = torch.cuda.amp.GradScaler()



Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 195MB/s]
  scaler = torch.cuda.amp.GradScaler()


In [6]:
#  Training
EPOCHS = 5
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for imgs, targets in tqdm(train_loader):
        imgs = [img.to(device) for img in imgs]
        targets = [{k:v.to(device) for k,v in t.items()} for t in targets]

        with torch.cuda.amp.autocast():
            loss_dict = model(imgs, targets)
            loss = sum(v for v in loss_dict.values())

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} Loss:", total_loss/len(train_loader))

torch.save(model.state_dict(), "/content/faster_rcnn_final.pth")
print("✔ Faster R-CNN trained")



  with torch.cuda.amp.autocast():
100%|██████████| 1267/1267 [23:44<00:00,  1.12s/it]


Epoch 1/5 Loss: 0.19955802481289256


100%|██████████| 1267/1267 [08:23<00:00,  2.52it/s]


Epoch 2/5 Loss: 0.12588097210370752


100%|██████████| 1267/1267 [08:24<00:00,  2.51it/s]


Epoch 3/5 Loss: 0.10765503381908928


100%|██████████| 1267/1267 [08:24<00:00,  2.51it/s]


Epoch 4/5 Loss: 0.10177261923934497


100%|██████████| 1267/1267 [08:25<00:00,  2.50it/s]


Epoch 5/5 Loss: 0.090919384919548
✔ Faster R-CNN trained


In [None]:

# Evaluation Metrics

model.eval()

TP, FP, FN = 0, 0, 0
correct, total = 0, 0

for imgs, targets in tqdm(test_loader):
    img = imgs[0].to(device)
    gt_boxes = targets[0]["boxes"].cpu().numpy()
    gt_labels = targets[0]["labels"].cpu().numpy()

    with torch.no_grad():
        preds = model([img])[0]

    boxes = preds["boxes"].cpu().numpy()
    labels = preds["labels"].cpu().numpy()
    scores = preds["scores"].cpu().numpy()

    matched_gt = set()

    for b,l,s in zip(boxes,labels,scores):
        if s < 0.5:
            continue

        total += 1
        matched = False
        for i,(gb,gl) in enumerate(zip(gt_boxes,gt_labels)):
            if i in matched_gt:
                continue
            if l==gl and IoU(b,gb)>=0.5:
                TP += 1
                correct += 1
                matched_gt.add(i)
                matched = True
                break
        if not matched:
            FP += 1

    FN += len(gt_boxes) - len(matched_gt)

accuracy = correct / total * 100 if total else 0
precision = TP / (TP + FP) * 100 if TP+FP else 0
recall = TP / (TP + FN) * 100 if TP+FN else 0
map50 = precision  # IoU≥0.5 approximation

print("\n Faster R-CNN Metrics")
print("Accuracy (%):", round(accuracy,2))
print("Precision (%):", round(precision,2))
print("Recall (%):", round(recall,2))
print("mAP@0.5 (%):", round(map50,2))




  2%|▏         | 7/318 [00:05<03:56,  1.31it/s]

In [None]:
# Visualization
sample_imgs = os.listdir(TEST_IMG)[:5]

for img_name in sample_imgs:
    img = cv2.imread(os.path.join(TEST_IMG, img_name))
    H,W,_ = img.shape
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # GT (BLUE)
    lbl_path = os.path.join(TEST_LBL, img_name.replace(".jpg",".txt"))
    gt_boxes,_ = read_yolo_labels(lbl_path, W, H)
    for b in gt_boxes:
        x1,y1,x2,y2 = map(int,b)
        cv2.rectangle(img_rgb,(x1,y1),(x2,y2),(0,0,255),2)

    # Predictions (GREEN)
    with torch.no_grad():
        pred = model([F.to_tensor(img_rgb).to(device)])[0]

    for b,s in zip(pred["boxes"].cpu().numpy(), pred["scores"].cpu().numpy()):
        if s < 0.5:
            continue
        x1,y1,x2,y2 = map(int,b)
        cv2.rectangle(img_rgb,(x1,y1),(x2,y2),(0,255,0),2)

    plt.figure(figsize=(8,8))
    plt.imshow(img_rgb)
    plt.title("Faster R-CNN: GT (Blue) vs Pred (Green)")
    plt.axis("off")
    plt.show()