<a href="https://colab.research.google.com/github/luiszerba/deeplearning_learning/blob/main/CapstoneProject_WeaponDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import torch
import os
import cv2
import numpy as np
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import xml.etree.ElementTree as ET
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from PIL import Image

drive.mount('/content/drive')
content_path = '/content/drive/MyDrive/deep learning/Capstone Project'

image_dir = f'{content_path}/Pistol detection/Weapons'
annot_dir = f'{content_path}/Pistol detection/xmls'

device = "cuda" if torch.cuda.is_available() else "cpu"

Mounted at /content/drive


In [2]:
# Helper function to list files in a directory
def list_files(directory, extension):
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(extension)]

# List image and annotation files
image_paths = list_files(image_dir, '.jpg')
annotation_paths = list_files(annot_dir, '.xml')

# Ensure the paths are sorted so that image and annotation files correspond correctly
image_paths.sort()
annotation_paths.sort()

# Verify the paths
print(f"Total images: {len(image_paths)}")
print(f"Total annotations: {len(annotation_paths)}")
print(image_paths[:5])
print(annotation_paths[:5])


Total images: 3000
Total annotations: 3000
['/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/Weapons/armas (1).jpg', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/Weapons/armas (10).jpg', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/Weapons/armas (100).jpg', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/Weapons/armas (1000).jpg', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/Weapons/armas (1001).jpg']
['/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/xmls/armas (1).xml', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/xmls/armas (10).xml', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/xmls/armas (100).xml', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/xmls/armas (1000).xml', '/content/drive/MyDrive/deep learning/Capstone Project/Pistol detection/xmls/armas (1001

In [45]:
import numpy as np
import torch

def resize_and_pad(image, target_size):
    original_size = image.size
    ratio = float(target_size) / max(original_size)
    new_size = tuple([int(x * ratio) for x in original_size])
    image = image.resize(new_size, Image.ANTIALIAS)

    # Create a new image with the target size and paste the resized image
    new_image = Image.new("RGB", (target_size, target_size))
    new_image.paste(image, ((target_size - new_size[0]) // 2,
                            (target_size - new_size[1]) // 2))
    return new_image, ratio

def adjust_boxes(boxes, ratio, target_size, original_size):
    new_boxes = []
    for box in boxes:
        xmin, ymin, xmax, ymax, label = box
        xmin = int(xmin * ratio + (target_size - original_size[0] * ratio) / 2)
        ymin = int(ymin * ratio + (target_size - original_size[1] * ratio) / 2)
        xmax = int(xmax * ratio + (target_size - original_size[0] * ratio) / 2)
        ymax = int(ymax * ratio + (target_size - original_size[1] * ratio) / 2)
        new_boxes.append([xmin, ymin, xmax, ymax, label])
    return new_boxes


In [58]:


IMAGE_SIZE = (416, 416)

# Function to parse XML annotation files
def parse_voc_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    boxes = []

    for obj in root.findall('object'):
        label = obj.find('name').text
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        boxes.append([xmin, ymin, xmax, ymax, label])  # Append all boxes for a single image

    return boxes

class CustomDataset(Dataset):
    def __init__(self, image_paths, annotation_paths, transform=None, target_size=800):
        self.image_paths = image_paths
        self.annotation_paths = annotation_paths
        self.transform = transform
        self.target_size = target_size

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        annotation_path = self.annotation_paths[idx]

        try:
            image = Image.open(img_path).convert("RGB")
            original_size = image.size
        except Exception as e:
            print(f"Error opening image file {img_path}: {e}")
            return None

        try:
            boxes = parse_voc_annotation(annotation_path)
        except Exception as e:
            print(f"Error parsing annotation file {annotation_path}: {e}")
            return None

        image, ratio = resize_and_pad(image, self.target_size)
        boxes = adjust_boxes(boxes, ratio, self.target_size, original_size)

        if self.transform:
            image = self.transform(image)

        boxes_tensor = torch.tensor([box[:4] for box in boxes], dtype=torch.float32)
        labels_tensor = torch.tensor([1 for _ in boxes], dtype=torch.int64)  # Assuming 'pistol' is the only class and using label '1'

        target = {'boxes': boxes_tensor, 'labels': labels_tensor}
        return image, target


# Define transformations
transform = transforms.Compose([transforms.ToTensor()])
dataset = CustomDataset(image_dir, annot_dir, transform, target_size=800)

# Custom collate function to handle None values
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return tuple(zip(*batch))

# Split dataset into training and test sets
train_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, collate_fn=collate_fn)

# Display one sample from the dataloader
for batch in train_dataloader:
    images, targets = batch
    for img, target in zip(images, targets):
        print(img.shape)
        print(target)
    break




  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)


torch.Size([3, 800, 800])
{'boxes': tensor([[ 46., 125., 741., 505.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[ 91., 117., 760., 581.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[486., 444., 582., 517.],
        [359., 502., 398., 597.]]), 'labels': tensor([1, 1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[ 25., 120., 760., 650.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[485., 232., 780., 391.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[601., 324., 745., 419.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[ 72., 247., 519., 679.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[ 32., 104., 756., 675.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[ 11., 241., 363., 439.]]), 'labels': tensor([1])}
torch.Size([3, 800, 800])
{'boxes': tensor([[294., 260., 560., 468.]]), 'labels': tensor([1])}
torch.Size([3

In [59]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


# Load a pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the classifier with a new one for our specific dataset
num_classes = 2  # 1 class (pistol) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move model to the right device
model.to(device)

# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)



In [60]:
# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

def train_model(model, data_loader, optimizer, device, num_epochs=5):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in target.items()} for target in targets]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimizer.step()
            running_loss += losses.item()

        epoch_loss = running_loss / len(data_loader)
        train_losses.append(epoch_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")

    return train_losses


train_model(model, train_dataloader, optimizer, device, num_epochs=5)


  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)


Epoch [1/5], Loss: 0.16732472244729388


  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)


Epoch [2/5], Loss: 0.10456934759512226


  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)


Epoch [3/5], Loss: 0.08838189872813985


  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)
  image = image.resize(new_size, Image.ANTIALIAS)


KeyboardInterrupt: 

In [None]:
# Evaluation function
def evaluate_model(model, data_loader, device):
    model.eval()
    eval_losses = []
    total_loss = 0.0
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in target.items()} for target in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            total_loss += losses.item()

    epoch_loss = total_loss / len(data_loader)
    eval_losses.append(epoch_loss)
    print(f"Validation Loss: {epoch_loss:.4f}")

    return eval_losses

In [None]:
import matplotlib.pyplot as plt

def plot_loss_curves(train_losses, eval_losses, num_epochs):
    epochs = range(1, num_epochs + 1)
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_losses, 'b', label='Training Loss')
    plt.plot(epochs, eval_losses, 'r', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def train_and_evaluate(model, train_loader, eval_loader, optimizer, device, num_epochs=5):
    train_losses = train_model(model, train_loader, optimizer, device, num_epochs)
    eval_losses = evaluate_model(model, eval_loader, device)
    plot_loss_curves(train_losses, eval_losses, num_epochs)

# Define the optimizer and the number of epochs
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

# Train and evaluate the model, then plot the loss curves
train_and_evaluate(model, train_dataloader, test_dataloader, optimizer, device, num_epochs)


In [53]:
# # Save the trained model
# torch.save(model.state_dict(), 'model.pth')

# # Load the saved model
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
# in_features = model.roi_heads.box_predictor.cls_score.in_features
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# model.load_state_dict(torch.load('model.pth'))
# model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 167MB/s]


NameError: name 'FastRCNNPredictor' is not defined