<a href="https://colab.research.google.com/github/manjulamishra/120-Data-Science-Interview-Questions/blob/master/object_detector_example_for_manjula.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


How should label.json look like?

```
{
    "image1.jpg": [
        {
            "bbox": [50, 50, 200, 200],
            "label": 0
        },
        {
            "bbox": [100, 100, 150, 150],
            "label": 1
        }
    ],
    "image2.jpg": [
        {
            "bbox": [30, 30, 120, 120],
            "label": 0
        }
    ],
    // ...
}
```



In [None]:
# Import necessary libraries
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F

from torchvision import transforms
from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image
import numpy as np
import os
import json
from sklearn.model_selection import train_test_split



# 1. Data Preprocessing
def load_and_preprocess_data(data_dir, label_file, img_size=(224, 224)):
    """
    Load images and labels, preprocess them and return in appropriate format.
    
    Parameters:
    - data_dir: Directory where image files are stored
    - label_file: JSON file containing labels
    - img_size: Size to which images are to be resized
    
    Returns:
    - images: List of preprocessed images
    - targets: List of corresponding targets (labels)
    """
    # Load labels
    with open(label_file) as f:
        labels = json.load(f)
    
    images = []
    targets = []
    
    for filename, label in labels.items():
        # Load image
        img_path = os.path.join(data_dir, filename)
        img = Image.open(img_path).convert("RGB")  # Convert image to RGB

        # Resize image and bounding boxes
        old_width, old_height = img.size
        img = img.resize(img_size)

        # Scale bounding boxes
        scale_x = img_size[0] / old_width
        scale_y = img_size[1] / old_height
        for box in label:
            box['bbox'] = [box['bbox'][0] * scale_x,  # xmin
                           box['bbox'][1] * scale_y,  # ymin
                           box['bbox'][2] * scale_x,  # xmax
                           box['bbox'][3] * scale_y]  # ymax

        # Convert to PyTorch tensors
        img_tensor = transforms.ToTensor()(img)
        boxes_tensor = torch.tensor([box['bbox'] for box in label])
        labels_tensor = torch.tensor([box['label'] for box in label])

        # Prepare target
        target = {}
        target['boxes'] = boxes_tensor
        target['labels'] = labels_tensor

        images.append(img_tensor)
        targets.append(target)

    return images, targets

# 2. Data Splitting

def split_data(images, targets, train_ratio=0.7, val_ratio=0.15):
    """
    Split the dataset into training, validation, and testing sets.

    Parameters:
    - images: List of preprocessed images
    - targets: List of corresponding targets (labels)
    - train_ratio: Proportion of dataset to include in the train split (0.7 by default)
    - val_ratio: Proportion of dataset to include in the validation split (0.15 by default)

    Returns:
    - train_images, train_targets: Training images and targets
    - val_images, val_targets: Validation images and targets
    - test_images, test_targets: Testing images and targets
    """
    # Compute test ratio from train_ratio and val_ratio
    test_ratio = 1.0 - train_ratio - val_ratio

    # Split into train and temp
    train_images, temp_images, train_targets, temp_targets = train_test_split(
        images, targets, test_size=1-train_ratio, random_state=42, stratify=[t['labels'].tolist() for t in targets])

    # Split temp into validation and test
    val_images, test_images, val_targets, test_targets = train_test_split(
        temp_images, temp_targets, test_size=test_ratio/(val_ratio+test_ratio), random_state=42, stratify=[t['labels'].tolist() for t in temp_targets])

    return train_images, train_targets, val_images, val_targets, test_images, test_targets


# 3. Model Configuration
def configure_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

# 4. Model Training
def train_model(model, train_data, val_data, device, num_epochs=10):
    # Move model to the right device
    model.to(device)

    # Define the optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    len_train = len(train_data)

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0

        for images, targets in train_data:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            total_train_loss += losses.item()

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        # Training loss for this epoch
        avg_train_loss = total_train_loss / len_train

        # Evaluate on the validation data
        model.eval()
        total_val_loss = 0

        with torch.no_grad():
            for images, targets in val_data:
                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

                loss_dict = model(images, targets)

                losses = sum(loss for loss in loss_dict.values())
                total_val_loss += losses.item()

        # Validation loss for this epoch
        avg_val_loss = total_val_loss / len(val_data)

        print(f"Epoch: {epoch+1}/{num_epochs}, Training loss: {avg_train_loss}, Validation loss: {avg_val_loss}")

    return model


# 5. Model Evaluation
def evaluate_model(model, data, device):
    """
    Evaluate the model on the given data.

    Parameters:
    - model: The model to evaluate
    - data: The data to evaluate the model on
    - device: The device (cpu or gpu) to use for evaluation

    Returns:
    - avg_loss: The average loss of the model on the data
    """
    model.eval()  # Set the model to evaluation mode
    total_loss = 0

    with torch.no_grad():  # No need to track gradients
        for images, targets in data:
            # Move images and targets to the right device
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            loss_dict = model(images, targets)

            # Compute total loss
            losses = sum(loss for loss in loss_dict.values())
            total_loss += losses.item()

    avg_loss = total_loss / len(data)
    return avg_loss


def test_model(model, test_data, device):
    """
    Test the model on the given data.

    Parameters:
    - model: The model to test
    - test_data: The data to test the model on
    - device: The device (cpu or gpu) to use for testing

    Returns:
    - predictions: The model's predictions on the test data
    """
    model.eval()  # Set the model to evaluation mode
    predictions = []

    with torch.no_grad():  # No need to track gradients
        for images, _ in test_data:  # We don't need targets here
            # Move images to the right device
            images = list(image.to(device) for image in images)

            # Forward pass
            pred = model(images)

            # Move predictions back to cpu
            pred = [{k: v.to('cpu') for k, v in p.items()} for p in pred]

            predictions.extend(pred)

    return predictions

def predict(model, image_path, device, transform=None):
    """
    Use the trained model to predict the objects in an image.

    Parameters:
    - model: The trained model
    - image_path: Path to the image file
    - device: The device (cpu or gpu) to use for inference
    - transform (optional): Transformations to apply to the image before passing it to the model

    Returns:
    - prediction: The model's prediction on the image
    """
    model.eval()  # Set the model to evaluation mode

    # Load image
    image = Image.open(image_path).convert("RGB")

    # Apply transformations if specified
    if transform is not None:
        image = transform(image)

    # Convert to PyTorch tensor and add an extra dimension
    image = F.to_tensor(image).unsqueeze(0)

    # Move image to the right device
    image = image.to(device)

    with torch.no_grad():  # No need to track gradients
        # Forward pass
        prediction = model(image)

    # Move prediction back to cpu and remove the extra dimension
    prediction = [{k: v.to('cpu').squeeze(0) for k, v in prediction[0].items()}]

    return prediction



In [None]:
# --- Main program ---

# 1. Data Preprocessing
data_dir = 'path_to_your_data'
label_file = 'full_path_to_json_file'
images, labels = load_and_preprocess_data(data_dir, label_file)

# 2. Data Splitting
train_images, train_targets, val_images, val_targets, test_images, test_targets = split_data(images, labels)
train_data = list(zip(train_images, train_targets))
val_data = list(zip(val_images, val_targets))
test_data = list(zip(test_images, test_targets))


# 3. Model Configuration
num_classes = 40  # 40 different types of objects
model = configure_model(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 4. Model Training
num_epochs = 10
train_model(model, train_data, val_data,  device, num_epochs)

# 5. Model Evaluation
evaluate_model(model, val_data)

# 6. Model Testing
test_model(model, test_data)

# 7. Inference
image_path = 'path_to_your_test_image'
predict(model, image_path)
