In [None]:
# !wget --directory-prefix=downloads http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip
# !unzip -qq downloads/cocostuff-10k-v1.1.zip -d dataset/
# !wget --directory-prefix=dataset/annotations-json http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.json

In [None]:
pip install torch torchvision pycocotools


In [None]:
pip install opencv-python numpy


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import torch
import json

# Load COCO-Stuff label mapping from a JSON file
def load_coco_stuff_labels(annotation_file):
    """
    Loads the COCO-Stuff label names from an annotation file (usually in JSON format).

    Args:
    - annotation_file: Path to the COCO-Stuff annotations JSON file.

    Returns:
    - label_map: Dictionary mapping label IDs to class names.
    """
    with open(annotation_file, 'r') as f:
        data = json.load(f)

    # Extract categories (id and name) from the annotation file
    label_map = {category['id']: category['name'] for category in data['categories']}
    return label_map

# Example: Path to your COCO-Stuff annotations JSON
annotation_file = '/content/dataset/annotations-json/cocostuff-10k-v1.1.json'

# Load the label map
LABELS = load_coco_stuff_labels(annotation_file)

def visualize_prediction(image, targets, prediction, idx=0):
    """
    Visualizes the predicted bounding boxes, labels, and scores on an image.

    Args:
    - image: Tensor representing the image.
    - targets: Ground truth (not used here but can be visualized).
    - prediction: Model prediction dictionary containing boxes, labels, and scores.
    - idx: Index of the image to visualize from the batch.
    """
    # Convert image back from tensor to numpy array (RGB)
    img = image[idx].permute(1, 2, 0).cpu().numpy()
    img = (img * 255).astype(np.uint8)

    # Create a figure and axis for plotting
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(img)

    # Draw the predicted boxes
    boxes = prediction['boxes'].cpu().numpy()
    labels = prediction['labels'].cpu().numpy()
    scores = prediction['scores'].cpu().numpy()

    for i in range(len(boxes)):
        if scores[i] > 0.1:  # Display boxes with score > 0.1
            rect = patches.Rectangle(
                (boxes[i][0], boxes[i][1]), boxes[i][2] - boxes[i][0], boxes[i][3] - boxes[i][1],
                linewidth=2, edgecolor='red', facecolor='none'
            )
            ax.add_patch(rect)

            # Get the label name from the mapping
            label_name = LABELS.get(labels[i], 'Unknown')  # Default to 'Unknown' if label not in mapping
            score = scores[i]
            ax.text(
                boxes[i][0], boxes[i][1], f'{label_name}: {score:.2f}', color='white',
                fontsize=12, bbox=dict(facecolor='red', alpha=0.7)
            )

    plt.show()

# Example usage (assuming you have a batch of images and predictions):
# image = ... (a batch of images, tensor format)
# targets = ... (ground truth annotations)
# prediction = ... (model predictions)

# visualize_prediction(image, targets, prediction, idx=0)


In [None]:
import json

# Path to the annotations file
annotation_file = '/content/dataset/annotations-json/cocostuff-10k-v1.1.json'

# Load the JSON file
with open(annotation_file, 'r') as f:
    annotations = json.load(f)

# Print the keys in the JSON file to understand the structure
print("Top-level keys in the JSON file:", annotations.keys())

# Inspect some image metadata
print("\nExample image entry:")
print(json.dumps(annotations['images'][0], indent=4))

# Inspect some annotation data
print("\nExample annotation entry:")
print(json.dumps(annotations['annotations'][0], indent=4))

# Inspect categories (labels)
print("\nCategories (label mapping):")
print(json.dumps(annotations['categories'], indent=4))


In [None]:
import os
import json
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

class CustomDatasetWithJSONAnnotations(Dataset):
    def __init__(self, image_dir, annotation_file, image_list_file, transforms=None):
        self.image_dir = image_dir
        self.transforms = transforms

        # Load annotations JSON file
        with open(annotation_file, 'r') as f:
            self.annotations_data = json.load(f)

                # Update the initialization of the `image_list`
        with open(image_list_file, 'r') as f:
            self.image_list = set(line.strip() + '.jpg' for line in f)  # Append '.jpg' to each filename

        # Create a list of images based on the provided image list
        self.images = [
            img for img in self.annotations_data['images']
            if img['file_name'] in self.image_list
        ]


        # Create annotation lookup dictionary
        self.annotations = self.annotations_data['annotations']
        self.image_to_annotations = {img['id']: [] for img in self.images}
        for anno in self.annotations:
            if anno['image_id'] in self.image_to_annotations:
                self.image_to_annotations[anno['image_id']].append(anno)

        # Map categories for label mapping
        self.categories = {cat['id']: cat['name'] for cat in self.annotations_data['categories']}

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Get image metadata
        image_meta = self.images[idx]
        image_path = os.path.join(self.image_dir, image_meta['file_name'])

        # Load and preprocess the image
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.tensor(img / 255.0, dtype=torch.float32).permute(2, 0, 1)

        # Get annotations for the current image
        annotations = self.image_to_annotations[image_meta['id']]

        boxes, labels, areas, iscrowd = [], [], [], []
        for anno in annotations:
            if isinstance(anno['bbox'], list) and len(anno['bbox']) == 4:
                x, y, w, h = anno['bbox']
                boxes.append([x, y, x + w, y + h])
                labels.append(anno['category_id'])
                areas.append(anno['area'])
                iscrowd.append(anno['iscrowd'])

            else:
                if isinstance(anno['bbox'], list) and len(anno['bbox']) == 1 and len(anno['bbox'][0]) == 4:
                  x, y, w, h = anno['bbox'][0]  # Extract the inner list
                  boxes.append([x, y, x + w, y + h])
                  labels.append(anno['category_id'])
                  areas.append(anno['area'])
                  iscrowd.append(anno['iscrowd'])
                else:
                # Handle cases where 'bbox' is not in the expected format
                  print(f"Warning: Skipping annotation with invalid 'bbox' format: {anno['bbox']}")
                # You can choose to ignore these annotations or handle them differently





        # Handle the case where there are no annotations for an image
        if not boxes:
            return img,None
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            areas = torch.as_tensor(areas, dtype=torch.float32)
            iscrowd = torch.as_tensor(iscrowd, dtype=torch.uint8)

        # Create the target dictionary
        target = {
            "boxes": boxes,
            "labels": labels,
            "area": areas,
            "iscrowd": iscrowd
        }

        # Apply any transforms if provided
        if self.transforms:
            img = self.transforms(img)

        return img, target


# Helper function to visualize an image with bounding boxes
def visualize_image_with_boxes(img, target, category_mapping):
    fig, ax = plt.subplots(1, figsize=(12, 9))
    img = img.permute(1, 2, 0).cpu().numpy()  # Convert from (C, H, W) to (H, W, C)
    ax.imshow(img)

    for box, label in zip(target['boxes'], target['labels']):
        x_min, y_min, x_max, y_max = box
        width = x_max - x_min
        height = y_max - y_min
        rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
        ax.text(x_min, y_min - 5, category_mapping[label.item()], color='yellow', fontsize=12, weight='bold')

    plt.axis('off')
    plt.show()


# File paths
train_image_dir = "/content/dataset/images"
val_image_dir = "/content/dataset/images"
annotation_file = "/content/dataset/annotations-json/cocostuff-10k-v1.1.json"
train_list_file = "/content/dataset/imageLists/train.txt"
test_list_file = "/content/dataset/imageLists/test.txt"

# Create dataset objects
train_dataset = CustomDatasetWithJSONAnnotations(
    train_image_dir, annotation_file, train_list_file
)
test_dataset = CustomDatasetWithJSONAnnotations(
    val_image_dir, annotation_file, test_list_file
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Visualize a batch
images, targets = next(iter(train_loader))
for i in range(len(images)):
    print(f"Visualizing image {i+1} in the batch:")
    visualize_image_with_boxes(images[i], targets[i], train_dataset.categories)


In [None]:

import os
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2,FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.resnet import ResNet50_Weights


# Load the pretrained Faster R-CNN model and modify it for your dataset
  # Number of classes in your dataset (adjust this based on your dataset)
model = fasterrcnn_resnet50_fpn_v2(weights_backbone=ResNet50_Weights.DEFAULT,num_classes=183,trainable_backbone_layers=2)
model.load_state_dict(torch.load('/content/drive/MyDrive/Faster_RCNN_train_state/faster_rcnn_best_model.pth'))

# Modify the model for the new number of classes




# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.to(device)

# Optimizer





In [None]:
# Function to evaluate the model on the validation set with visualization
def evaluate_model(model, loader, device, visualize=False, num_images_to_visualize=5):


       model.eval()
       total_loss = 0.0
       image_idx = 0
       with torch.no_grad():
           for images, targets in loader:

                # Filter out images with None targets
               valid_indices = [i for i, target in enumerate(targets) if target is not None]

               if not valid_indices:  # Skip if all targets are None
                 continue

               images = [images[i] for i in valid_indices]
               targets = [targets[i] for i in valid_indices]




               images = [img.to(device) for img in images]
               targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

               model.train()
               loss_dict = model(images, targets)
               losses = sum(loss for loss in loss_dict.values())
               total_loss += losses.item()


               model.eval()

               if visualize and image_idx < num_images_to_visualize:
                # Get model predictions
                predictions = model(images)
                print(predictions[0])
                # Visualize the first image in the batch
                visualize_prediction(images, targets, predictions[0], idx=0)
                image_idx += 1

       avg_loss = total_loss / len(loader)
       print(f"Validation Loss: {avg_loss:.4f}")
       return avg_loss




In [None]:
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

params = [
    # Pre-trained backbone with a low learning rate
    {"params": model.backbone.parameters(), "lr": 1e-5},
    # Region Proposal Network (RPN) with a medium learning rate
    {"params": model.rpn.parameters(), "lr": 1e-4},
    # RoI heads with a higher learning rate
    {"params": model.roi_heads.parameters(), "lr": 1e-3}
]

# Define the optimizer with different learning rates for each part
optimizer = Adam(params, weight_decay=0.0005)  # Optional: Add L2 regularization

# Set up the ReduceLROnPlateau scheduler
lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4)



In [None]:

import torch.cuda.amp as amp
scaler = amp.GradScaler()
# Training loop with validation after each epoch
# Training loop with validation and visualization after each epoch
num_epochs = 20  # Set the number of epochs based on your available time
best_val_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    # Training step
    for images, targets in train_loader:

        # Filter out images with None targets
        valid_indices = [i for i, target in enumerate(targets) if target is not None]

        if not valid_indices:  # Skip if all targets are None
          continue

        images = [images[i] for i in valid_indices]
        targets = [targets[i] for i in valid_indices]

        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]



        # Backpropagation
        optimizer.zero_grad()


        with amp.autocast():
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # Scales the loss, performs backward pass, and updates gradients
        scaler.scale(losses).backward()

        # Update the weights using the scaled gradients
        scaler.step(optimizer)
        scaler.update()

        train_loss += losses.item()



    # Validate the model after every epoch and visualize predictions
    val_loss=evaluate_model(model, test_loader, device, visualize=True)
    lr_scheduler.step(val_loss)

    current_lr = optimizer.param_groups[0]['lr']

    avg_train_loss = train_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Learning Rate: {current_lr:.6f}")

    # Save the model if it has improved validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'faster_rcnn_best_model.pth')
        print(f"Model saved with validation loss: {best_val_loss:.4f}")

# Save the final model after training
torch.save(model.state_dict(), 'faster_rcnn_final_model_20_epoch.pth')
print("Training completed and final model saved.")





In [None]:
torch.save(lr_scheduler.state_dict(),'lr_scheduler.pth')
torch.save(optimizer.state_dict(),'optimizer.pth')

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
!cp '/content/faster_rcnn_best_model.pth' '/content/drive/MyDrive/RCNN_30_Epoch'
!cp '/content/lr_scheduler.pth' '/content/drive/MyDrive/RCNN_30_Epoch'
!cp '/content/optimizer.pth' '/content/drive/MyDrive/RCNN_30_Epoch'
!cp '/content/faster_rcnn_final_model_20_epoch.pth' '/content/drive/MyDrive/RCNN_30_Epoch'

In [None]:
import json
import torch
from torchvision import models, transforms
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np

# Load the class mappings from a JSON file
def load_label_mapping(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Extract the categories and map the id to the name
    label_map = {category['id']: category['name'] for category in data['categories']}

    return label_map

# Function to visualize predictions with labels
def visualize_prediction(image, predictions, label_map, threshold=0.5):
    """
    Visualizes the predicted bounding boxes, labels, and scores on an image.
    Args:
        image (PIL Image or numpy.ndarray): Input image
        predictions (dict): Model's predictions (boxes, labels, scores)
        label_map (dict): Mapping of label IDs to human-readable class names
        threshold (float): Confidence threshold to display predictions
    """
    # Convert image to numpy array if it's a PIL Image
    if isinstance(image, Image.Image):
        image = np.array(image)

    # Create a figure and axis for plotting
    fig, ax = plt.subplots(1, figsize=(12, 9))
    ax.imshow(image)

    # Get predictions
    boxes = predictions['boxes'].cpu().numpy()
    labels = predictions['labels'].cpu().numpy()
    scores = predictions['scores'].cpu().numpy()

    for i in range(len(boxes)):
        if scores[i] > threshold:  # Filter predictions based on score
            rect = patches.Rectangle(
                (boxes[i][0], boxes[i][1]),
                boxes[i][2] - boxes[i][0],
                boxes[i][3] - boxes[i][1],
                linewidth=2, edgecolor='red', facecolor='none'
            )
            ax.add_patch(rect)

            # Get the label name from label_map
            label_name = label_map.get(labels[i], 'Unknown')

            # Add label and score text
            ax.text(
                boxes[i][0], boxes[i][1] - 5,
                f'{label_name}: {scores[i]:.2f}',
                color='white', fontsize=12,
                bbox=dict(facecolor='red', alpha=0.7)
            )

    plt.show()
    print(label_name)

# Load a pretrained Faster R-CNN model (or your custom model)

model.load_state_dict(torch.load('/content/faster_rcnn_best_model.pth'))
model.eval() # Set the model to evaluation mode
# Load the image from file
image_path = '/content/U76fMj.webp'  # Change this to the path of your image
image = Image.open(image_path)

# Define the necessary transformations for input image
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
])

# Preprocess the image
image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

# Perform inference (predict)
with torch.no_grad():
    image_tensor = image_tensor.to(device)
    predictions = model(image_tensor)

# Load label mapping from a JSON file
label_map = load_label_mapping('/content/dataset/annotations-json/cocostuff-10k-v1.1.json')  # Update with your JSON path

# Visualize the predictions
visualize_prediction(image, predictions[0], label_map)
