<a href="https://colab.research.google.com/github/raz0208/CityPerson_PedestrianDetection/blob/main/ModelImplementation/SSDModelForPedestrianDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instal Required Libraries
import os
import json
import cv2
import torch
import numpy as np
import pandas as pd
from PIL import Image
import seaborn as sns
import torch.optim as optim
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models.detection as detection
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
def convert_tif_to_jpg(source_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)
    for root, _, files in os.walk(source_dir):
        for file in files:
            if file.endswith('.tif'):
                tif_path = os.path.join(root, file)
                img = Image.open(tif_path).convert('L')  # Convert to grayscale if not already
                jpg_file = file.replace('.tif', '.jpg')
                target_path = os.path.join(target_dir, jpg_file)
                img.save(target_path, 'JPEG')
                #print(f"Converted {tif_path} to {target_path}")

In [None]:
# Read the data from the source and store the result in new path
source_folder = '/kaggle/input/city-persone/gtFinePanopticParts_trainval/gtFinePanopticParts/train'
destination_folder = '/kaggle/working/CityPersonBboxPanoptic/train/images'
convert_tif_to_jpg(source_folder, destination_folder)

# Read the data from the source and store the result in new path
source_folder = '/kaggle/input/city-persone/gtFinePanopticParts_trainval/gtFinePanopticParts/val'
destination_folder = '/kaggle/working/CityPersonBboxPanoptic/val/images'
convert_tif_to_jpg(source_folder, destination_folder)

In [None]:
def convert_json_to_txt(source_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)
    for root, _, files in os.walk(source_dir):
        for file in files:
            if file.endswith('.json'):
                json_path = os.path.join(root, file)
                with open(json_path, 'r') as f:
                    data = json.load(f)

                txt_file = file.replace('.json', '.txt')
                target_path = os.path.join(target_dir, txt_file)

                with open(target_path, 'w') as f:
                    for obj in data.get('objects', []):
                        label = obj.get('label', 'unknown')
                        x, y, w, h = obj.get('bbox', [0, 0, 0, 0])
                        f.write(f"{label} {x} {y} {w} {h}\n")

                #print(f"Converted {json_path} to {target_path}")

In [None]:
# Read the data from the source and store the result in new path
source_folder = '/kaggle/input/city-persone/gtBbox_cityPersons_trainval/gtBboxCityPersons/train'
destination_folder = '/kaggle/working/CityPersonBboxPanoptic/train/labels'
convert_json_to_txt(source_folder, destination_folder)

# Read the data from the source and store the result in new path
source_folder = '/kaggle/input/city-persone/gtBbox_cityPersons_trainval/gtBboxCityPersons/val'
destination_folder = '/kaggle/working/CityPersonBboxPanoptic/val/labels'
convert_json_to_txt(source_folder, destination_folder)

In [None]:
# Define label mapping for SSD
class_mapping = {
    "pedestrian": 0,
    "rider": 1,
    "sitting person": 2,
    "person group": 3,
    "person (other)": 4,
}

def convert_txt_to_ssd_format(source_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)
    for root, _, files in os.walk(source_dir):
        for file in files:
            if file.endswith('.txt'):
                txt_path = os.path.join(root, file)
                target_path = os.path.join(target_dir, file)

                with open(txt_path, 'r') as f:
                    lines = f.readlines()

                with open(target_path, 'w') as f:
                    for line in lines:
                        parts = line.strip().split()
                        if len(parts) == 7:
                            label, x, y, w, h, img_width, img_height = parts
                            label_id = class_mapping.get(label, -1)
                            if label_id == -1:
                                continue
                            x, y, w, h, img_width, img_height = map(int, [x, y, w, h, img_width, img_height])
                            x_min = x / img_width
                            y_min = y / img_height
                            x_max = (x + w) / img_width
                            y_max = (y + h) / img_height
                            f.write(f"{label_id} {x_min:.6f} {y_min:.6f} {x_max:.6f} {y_max:.6f}\n")

                #print(f"Converted {txt_path} to SSD format at {target_path}")

In [None]:
# Read the data from the source and store the result in new path
source_folder = '/kaggle/working/CityPersonBboxPanoptic/train/labels'
destination_folder = '/kaggle/working/CityPersonBboxSSDFormat/train/labels'
convert_txt_to_ssd_format(source_folder, destination_folder)

# Read the data from the source and store the result in new path
source_folder = '/kaggle/working/CityPersonBboxPanoptic/train/labels'
destination_folder = '/kaggle/working/CityPersonBboxSSDFormat/val/labels'
convert_txt_to_ssd_format(source_folder, destination_folder)

In [None]:
def rename_files_in_dir(output_dir):
    """
    Rename all .txt files in the output_dir by replacing 'gtBboxCityPersons' with 'gtFinePanopticParts'.
    """
    # Loop through all files in the directory
    for file_name in os.listdir(output_dir):
        # Check if the file is a .txt file
        if file_name.endswith('.txt'):
            # Replace 'gtBboxCityPersons' with 'gtFinePanopticParts' in the file name
            new_name = file_name.replace('gtBboxCityPersons.ssd', 'gtFinePanopticParts')

            # Get the full path of the old and new file names
            old_file_path = os.path.join(output_dir, file_name)
            new_file_path = os.path.join(output_dir, new_name)

            if os.path.exists(new_file_path):
                print(f"Skipping rename: {new_file_path} already exists")
            else:
                # Rename the file
                os.rename(old_file_path, new_file_path)

# Define the output directory where the text files are located
output_dir = '/kaggle/working/CityPersonBboxSSDFormat/train/labels'
output_dir1 = '/kaggle/working/CityPersonBboxSSDFormat/val/labels'

# Rename all .txt files in the directory
rename_files_in_dir(output_dir)
rename_files_in_dir(output_dir1)

In [None]:
def remove_empty_files(txt_dir, img_dir):
    txt_rm_files = 0
    jpg_rm_files = 0
    for root, _, files in os.walk(txt_dir):
        for file in files:
            if file.endswith('.txt'):
                txt_path = os.path.join(root, file)
                if os.path.getsize(txt_path) == 0:
                    os.remove(txt_path)
                    txt_rm_files += 1
                    #print(f"Removed empty file: {txt_path}")
                    jpg_file = file.replace('.txt', '.jpg')
                    jpg_path = os.path.join(img_dir, jpg_file)
                    if os.path.exists(jpg_path):
                        os.remove(jpg_path)
                        jpg_rm_files += 1
                        #print(f"Removed corresponding image: {jpg_path}")
    print(f"txt file romeved count: {txt_rm_files}")
    print(f"jpg file romeved count: {jpg_rm_files}")

In [None]:
# Read the data from train set
source_txt_files = '/kaggle/working/CityPersonBboxSSDFormat/train/labels'
source_jpg_files = '/kaggle/working/CityPersonBboxPanoptic/train/images'
print('Empty removed files from traning set (images and labels):')
remove_empty_files(source_txt_files, source_jpg_files)

print('\n')

# Read the data from train set
source_txt_files = '/kaggle/working/CityPersonBboxSSDFormat/val/labels'
source_jpg_files = '/kaggle/working/CityPersonBboxPanoptic/val/images'
print('Empty removed files from val set (images and labels):')
remove_empty_files(source_txt_files, source_jpg_files)

In [None]:
def resize_images(source_dir, target_dir, size=(512, 512)):
    """
    Resize images to a specified size and save them to the target directory.

    Args:
        source_dir (str): Directory containing the original images.
        target_dir (str): Directory to save the resized images.
        size (tuple): Target size (width, height) for resizing.
    """
    os.makedirs(target_dir, exist_ok=True)
    for root, _, files in os.walk(source_dir):
        for file in files:
            if file.endswith('.jpg'):
                img_path = os.path.join(root, file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Read as grayscale

                if img is not None:
                    resized_img = cv2.resize(img, size)
                    target_path = os.path.join(target_dir, file)
                    cv2.imwrite(target_path, resized_img)
                    #print(f"Resized and saved: {target_path}")


# Example usage:
train_images_dir = '/kaggle/working/CityPersonBboxPanoptic/train/images'
resized_train_dir = '/kaggle/working/CityPersonBboxSSDFormat/train/images'
resize_images(train_images_dir, resized_train_dir, size=(512, 512))

val_images_dir = '/kaggle/working/CityPersonBboxPanoptic/val/images'
resized_val_dir = '/kaggle/working/CityPersonBboxSSDFormat/val/images'
resize_images(val_images_dir, resized_val_dir, size=(512, 512))

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models.detection import ssd300_vgg16

class PedestrianDataset(Dataset):
    def __init__(self, images_dir, labels_dir, transform=None):
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.image_files[idx])
        label_path = os.path.join(self.labels_dir, self.image_files[idx].replace('.jpg', '.txt'))

        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  # Convert grayscale to 3 channels

        h, w = img.shape[:2]
        boxes = []
        labels = []

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    label = int(parts[0])
                    x_center, y_center, width, height = map(float, parts[1:])

                    x_min = (x_center - width / 2) * w
                    y_min = (y_center - height / 2) * h
                    x_max = (x_center + width / 2) * w
                    y_max = (y_center + height / 2) * h

                    boxes.append([x_min, y_min, x_max, y_max])
                    labels.append(label)

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {'boxes': boxes, 'labels': labels}

        if self.transform:
            img = self.transform(img)

        return img, target

# Transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((512, 512)),
    transforms.ToTensor()
])

# Dataset and DataLoader
train_dataset = PedestrianDataset('/kaggle/working/CityPersonBboxSSDFormat/train/images',
                                  '/kaggle/working/CityPersonBboxSSDFormat/train/labels',
                                  transform=transform)

val_dataset = PedestrianDataset('/kaggle/working/CityPersonBboxSSDFormat/val/images',
                                '/kaggle/working/CityPersonBboxSSDFormat/val/labels',
                                transform=transform)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
# SSD Model
model = ssd300_vgg16(pretrained=True)
num_classes = 5 + 1  # 5 classes + background
model.head.classification_head.num_classes = num_classes

# Training setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

In [None]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {losses.item():.4f}")

print("Training complete!")

In [None]:
print(f"Length of true_labels: {len(true_labels)}")
print(f"Length of pred_labels: {len(pred_labels)}")

In [None]:
# Compute confusion matrix
cm = confusion_matrix(true_labels, pred_labels)

# Define class names
class_names = ["pedestrian", "rider", "sitting person", "person group", "person (other)"]

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()