In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
from collections import defaultdict
import os
import cv2
import seaborn as sns
import numpy as np
from sklearn.metrics import precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt

# 🔹 Custom Flickr8K Dataset
class Flickr8KDataset(Dataset):
    def __init__(self, image_folder, label_folder):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.image_files = os.listdir(image_folder)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_folder, self.image_files[idx])
        label_path = os.path.join(self.label_folder, self.image_files[idx].replace('.jpg', '.txt'))

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        h, w, _ = img.shape
        boxes, labels = [], []

        try:
            with open(label_path, "r") as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    if len(parts) != 5:
                        print(f"Skipping malformed line in {label_path}: {line.strip()}")
                        continue  # Skip incorrect lines

                    class_id, x_center, y_center, width, height = map(float, parts)
                    
                    # Convert YOLO format (relative) to absolute pixel coordinates
                    x1 = (x_center - width / 2) * w
                    y1 = (y_center - height / 2) * h
                    x2 = (x_center + width / 2) * w
                    y2 = (y_center + height / 2) * h

                    boxes.append([x1, y1, x2, y2])
                    labels.append(int(class_id))
        except FileNotFoundError:
            print(f"Warning: No label file found for {img_path}, skipping.")
            boxes, labels = [], []

        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
        }
        
        img = F.to_tensor(img)
        return img, target

# 🔹 Load Dataset
train_dataset = Flickr8KDataset("C:\\Users\\ASUS\\Downloads\\IIITD Ass\\Flickr8k.v3i.yolov8\\train\\images", "C:\\Users\\ASUS\\Downloads\\IIITD Ass\\Flickr8k.v3i.yolov8\\train\\labels")
valid_dataset = Flickr8KDataset("C:\\Users\\ASUS\\Downloads\\IIITD Ass\\Flickr8k.v3i.yolov8\\valid\\images", "C:\\Users\\ASUS\\Downloads\\IIITD Ass\\Flickr8k.v3i.yolov8\\valid\\labels")
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


# 🔹 Load Faster R-CNN Model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 34  # Your dataset has 34 classes (including background)

# Modify the model's classifier head for 34 classes
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

flickr8k_classes = {
    0: "objects", 1: "Carton", 2: "Hat", 3: "Mountains", 4: "Pipe", 5: "Polythene", 6: "Tent", 
    7: "Tub", 8: "Backpack", 9: "Baseball Glove", 10: "Bed", 11: "Bicycle", 12: "Bird", 13: "Boat", 
    14: "Car", 15: "Cat", 16: "Cell Phone", 17: "Couch", 18: "Dog", 19: "Frisbee", 20: "Handbag", 
    21: "Horse", 22: "Motorcycle", 23: "Mug", 24: "Person", 25: "Potted Plant", 26: "Rope", 
    27: "Skateboard", 28: "Sports Ball", 29: "Suitcase", 30: "Surfboard", 31: "Table", 32: "Tree", 
    33: "Umbrella"
}

# 🔹 Training Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# 🔹 Training Loop
for epoch in range(100):
    model.train()
    epoch_loss = 0
    
    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {epoch_loss / len(train_loader):.4f}")

print("✅ Faster R-CNN Training Completed")







In [None]:
import torch
import numpy as np
from sklearn.metrics import precision_recall_curve, average_precision_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Function to compute IoU (Intersection over Union)
def calculate_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

# Function to match predictions with ground truth
def match_predictions(gt_boxes, gt_labels, pred_boxes, pred_labels, pred_scores, iou_threshold=0.5):
    matched_gt = []
    matched_pred = []
    matched_scores = []
    
    used_preds = set()

    for gt_idx, gt_box in enumerate(gt_boxes):
        best_iou = 0
        best_pred_idx = -1

        for pred_idx, pred_box in enumerate(pred_boxes):
            if pred_idx in used_preds:
                continue

            iou = calculate_iou(gt_box, pred_box)
            if iou > best_iou and iou >= iou_threshold:
                best_iou = iou
                best_pred_idx = pred_idx

        if best_pred_idx != -1:
            matched_gt.append(gt_labels[gt_idx])
            matched_pred.append(pred_labels[best_pred_idx])
            matched_scores.append(pred_scores[best_pred_idx])
            used_preds.add(best_pred_idx)

    return matched_gt, matched_pred, matched_scores

# Perform inference on validation set
model.eval()
all_gt_labels = []
all_pred_labels = []
all_pred_scores = []

with torch.no_grad():
    for images, targets in valid_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for i in range(len(images)):
            gt_boxes = targets[i]["boxes"].cpu().numpy()
            gt_labels = targets[i]["labels"].cpu().numpy()

            pred_boxes = outputs[i]["boxes"].cpu().numpy()
            pred_labels = outputs[i]["labels"].cpu().numpy()
            pred_scores = outputs[i]["scores"].cpu().numpy()

            matched_gt, matched_pred, matched_scores = match_predictions(gt_boxes, gt_labels, pred_boxes, pred_labels, pred_scores)

            all_gt_labels.extend(matched_gt)
            all_pred_labels.extend(matched_pred)
            all_pred_scores.extend(matched_scores)

# Compute mAP (mean Average Precision)
def compute_map(gt_labels, pred_labels, pred_scores, num_classes):
    ap_per_class = []

    for cls in range(1, num_classes):
        cls_gt = np.array([1 if label == cls else 0 for label in gt_labels])
        cls_pred_scores = np.array([score if label == cls else 0 for label, score in zip(pred_labels, pred_scores)])

        if np.sum(cls_gt) > 0:
            precision, recall, _ = precision_recall_curve(cls_gt, cls_pred_scores)
            ap_score = average_precision_score(cls_gt, cls_pred_scores)
            ap_per_class.append(ap_score)

    mAP = np.mean(ap_per_class) if ap_per_class else 0
    return mAP, ap_per_class

num_classes = 34
mAP, ap_per_class = compute_map(all_gt_labels, all_pred_labels, all_pred_scores, num_classes)
print(f"📊 Mean Average Precision (mAP): {mAP:.3f}")

# Confusion Matrix
conf_matrix = confusion_matrix(all_gt_labels, all_pred_labels, labels=list(range(num_classes)))
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_recall_curve, average_precision_score, confusion_matrix
from collections import defaultdict

# 🔹 Evaluation
model.eval()
all_preds, all_targets, all_scores = [], [], []

with torch.no_grad():
    for images, targets in valid_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for output, target in zip(outputs, targets):
            threshold = 0.2  # Adjust as needed
            scores = output["scores"].cpu().numpy()
            labels = output["labels"].cpu().numpy()
            
            valid_indices = scores > threshold  # Keep only high-confidence detections
            scores = scores[valid_indices]
            labels = labels[valid_indices]

            all_preds.extend(labels)
            all_scores.extend(scores)
            all_targets.extend(target["labels"].cpu().numpy())

# Ensure all_targets and all_preds are of equal length
min_len = min(len(all_targets), len(all_preds))
all_targets = all_targets[:min_len]  # Trim to match length
all_preds = all_preds[:min_len]

# Organize scores and targets per class
class_scores_dict = defaultdict(list)
class_targets_dict = defaultdict(list)

for pred_label, pred_score, true_label in zip(all_preds, all_scores, all_targets):
    class_scores_dict[true_label].append(pred_score)
    class_targets_dict[true_label].append(1)  # Positive class
    for other_class in range(num_classes):
        if other_class != true_label:
            class_targets_dict[other_class].append(0)
            class_scores_dict[other_class].append(pred_score)

# 🔹 Compute Precision-Recall per class
precision_scores, recall_scores, ap_scores = {}, {}, {}

for class_id in class_targets_dict.keys():
    y_true = np.array(class_targets_dict[class_id])
    y_score = np.array(class_scores_dict[class_id])

    if len(y_true) == 0 or len(y_score) == 0:
        print(f"Skipping class {class_id} due to no data")
        continue  # No data for this class

    precision, recall, _ = precision_recall_curve(y_true, y_score)
    ap = average_precision_score(y_true, y_score)

    precision_scores[class_id] = precision
    recall_scores[class_id] = recall
    ap_scores[class_id] = ap

# 🔹 Compute Mean Average Precision (mAP) at IoU 50 and IoU 50-95
mAP50 = np.mean(list(ap_scores.values()))
mAP50_95 = np.mean(list(ap_scores.values()))  # Placeholder (modify as needed)

print(f"mAP@50: {mAP50:.4f}")
print(f"mAP@50-95: {mAP50_95:.4f}")

# 🔹 Plot Precision-Recall Curve
plt.figure(figsize=(8, 6))
for class_id, precision in precision_scores.items():
    recall = recall_scores[class_id]
    plt.plot(recall, precision, label=f'Class {class_id}')

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid()
plt.show()

# Compute Confusion Matrix
cm = confusion_matrix(all_targets, all_preds, labels=list(flickr8k_classes.keys()))
print("Confusion Matrix:\n", cm)  # Print for debugging

# Plot Confusion Matrix with Class Names
plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=flickr8k_classes.values(), yticklabels=flickr8k_classes.values())
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.xticks(rotation=90)  # Rotate x-axis labels for better visibility
plt.yticks(rotation=0)
plt.show()

# Save metrics
true_positives = sum([sum(class_targets_dict[c]) for c in class_targets_dict])
false_positives = len(all_preds) - true_positives
false_negatives = len(all_targets) - true_positives

precision = true_positives / (true_positives + false_positives + 1e-6)  # Avoid division by zero
recall = true_positives / (true_positives + false_negatives + 1e-6)  # Correct recall calculation

metrics = {
    "Precision(B)": precision,
    "Recall(B)": recall,
    "mAP50(B)": mAP50,
    "mAP50-95(B)": mAP50_95
}

np.save("metrics.npy", metrics)

print("✅ Evaluation Completed: Metrics, Precision-Recall Curve & Confusion Matrix Saved")
print(metrics)

# Save only model weights
torch.save(model.state_dict(), "faster_rcnn_flickr8k.pth")

# Save entire model (architecture + weights)
torch.save(model, "faster_rcnn_flickr8k_full.pth")

print("✅ Model saved successfully!")


In [None]:
# 🔹 Evaluation
model.eval()
all_preds, all_targets, all_scores = [], [], []

with torch.no_grad():
    for images, targets in valid_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for output, target in zip(outputs, targets):
            threshold = 0.2  # Adjust as needed
            scores = output["scores"].cpu().numpy()
            labels = output["labels"].cpu().numpy()
            
            valid_indices = scores > threshold  # Keep only high-confidence detections
            scores = scores[valid_indices]
            labels = labels[valid_indices]


            all_preds.extend(labels)
            all_scores.extend(scores)
            all_targets.extend(target["labels"].cpu().numpy())


# Ensure all_targets and all_preds are of equal length
min_len = min(len(all_targets), len(all_preds))
all_targets = all_targets[:min_len]  # Trim to match length
all_preds = all_preds[:min_len]

# Organize scores and targets per class
class_scores_dict = defaultdict(list)
class_targets_dict = defaultdict(list)

for pred_label, pred_score, true_label in zip(all_preds, all_scores, all_targets):
    class_scores_dict[true_label].append(pred_score)
    class_targets_dict[true_label].append(1)  # Positive class
    for other_class in range(num_classes):
        if other_class != true_label and other_class in labels:  # Ensure only relevant negatives are added
            class_targets_dict[other_class].append(0)
            class_scores_dict[other_class].append(pred_score)
    


# 🔹 Compute Precision-Recall per class
precision_scores, recall_scores, ap_scores = {}, {}, {}

for class_id in class_targets_dict.keys():
    y_true = np.array(class_targets_dict[class_id])
    y_score = np.array(class_scores_dict[class_id])

    if len(y_true) == 0 or len(y_score) == 0:
        print(f"Skipping class {class_id} due to no data")
        continue  # No data for this class

    precision, recall, _ = precision_recall_curve(y_true, y_score)
    ap = average_precision_score(y_true, y_score)

    precision_scores[class_id] = precision
    recall_scores[class_id] = recall
    ap_scores[class_id] = ap

# 🔹 Compute Mean Average Precision (mAP)
mAP50 = np.mean([ap_scores[c] for c in ap_scores if c in ap_scores])
mAP50_95 = np.mean([ap_scores[c] for c in ap_scores if c in ap_scores])  # Placeholder (modify as needed)

print(f"mAP@50: {mAP50:.4f}")
print(f"mAP@50-95: {mAP50_95:.4f}")

# 🔹 Plot Precision-Recall Curve
plt.figure(figsize=(8, 6))
for class_id, precision in precision_scores.items():
    recall = recall_scores[class_id]
    plt.plot(recall, precision, label=f'Class {class_id}')

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid()
plt.show()

# Compute Confusion Matrix
cm = confusion_matrix(all_targets, all_preds, labels=list(flickr8k_classes.keys()))
print("Confusion Matrix:\n", cm)  # Print for debugging


# Plot Confusion Matrix with Class Names
plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=flickr8k_classes.values(), yticklabels=flickr8k_classes.values())
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.xticks(rotation=90)  # Rotate x-axis labels for better visibility
plt.yticks(rotation=0)
plt.show()



# Save metrics
true_positives = sum([sum(class_targets_dict[c]) for c in class_targets_dict])
false_positives = sum([len(class_targets_dict[c]) - sum(class_targets_dict[c]) for c in class_targets_dict])

precision = true_positives / (true_positives + false_positives + 1e-6)  # Avoid division by zero
recall = true_positives / (len(all_targets) + 1e-6)  # Compute recall correctly

metrics = {
    "Precision(B)": precision,
    "Recall(B)": recall,
    "mAP50(B)": mAP50,
    "mAP50-95(B)": mAP50_95
}

np.save("metrics.npy", metrics)

print("✅ Evaluation Completed: Metrics, Precision-Recall Curve & Confusion Matrix Saved")
print(metrics)

# Save only model weights
torch.save(model.state_dict(), "faster_rcnn_flickr8k.pth")

# Save entire model (architecture + weights)
torch.save(model, "faster_rcnn_flickr8k_full.pth")

print("✅ Model saved successfully!")

In [None]:
import matplotlib.pyplot as plt
import cv2

def visualize_predictions(model, dataloader, device, num_images=3):
    model.eval()
    with torch.no_grad():
        for i, (images, targets) in enumerate(dataloader):
            if i >= num_images:
                break
            
            images = torch.stack([img.to(device) for img in images])
            predictions = model(images)

            for img, pred, target in zip(images, predictions, targets):
                img = img.cpu().permute(1, 2, 0).numpy()  # Convert Tensor to NumPy
                img = (img * 255).astype(np.uint8)  # Convert to uint8
                
                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # OpenCV expects BGR
                
                # Draw ground truth boxes
                for box in target["boxes"].cpu().numpy():
                    x1, y1, x2, y2 = map(int, box)
                    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green for GT
                
                # Draw predicted boxes
                for box, score in zip(pred[:, :4].cpu().numpy(), pred[:, 4].cpu().numpy()):
                    if score > 0.3:  # Only draw high-confidence predictions
                        x1, y1, x2, y2 = map(int, box)
                        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)  # Blue for Predictions
                
                plt.figure(figsize=(8, 6))
                plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # Convert BGR back to RGB for display
                plt.show()

visualize_predictions(model_eval, valid_loader, device)
