# CCTV Weapon Detection using Transfer Learning

**Binary classifier** – predicts whether a surveillance image contains a weapon (gun/knife).

Dataset: [CCTV Weapon Detection](https://www.kaggle.com/datasets/simuletic/cctv-weapon-dataset) (Simuletic)

Approach: Transfer learning with MobileNetV2.

In [None]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

print("TensorFlow version:", tf.__version__)

## 1. Set paths

Make sure the dataset is downloaded and placed in `data/cctv-weapon-dataset/`.

Folder structure:
```
data/
└── cctv-weapon-dataset/
    ├── images/          # all .jpg/.png files
    └── labels/          # corresponding .txt files (YOLO format)
```

In [None]:
BASE_DIR = "data/cctv-weapon-dataset"
IMAGE_DIR = os.path.join(BASE_DIR, "images")
LABEL_DIR = os.path.join(BASE_DIR, "labels")

# Verify folders exist
assert os.path.exists(IMAGE_DIR), f"Image directory not found: {IMAGE_DIR}"
assert os.path.exists(LABEL_DIR), f"Label directory not found: {LABEL_DIR}"

## 2. Parse YOLO labels to create binary classes

YOLO format: `class_id x_center y_center width height` (normalized).

Class mapping: 0 = `person`, 1 = `weapon`.

For each image, we check its corresponding `.txt` label file. If any annotation has `class_id == 1`, the image contains a weapon. Otherwise, it does not.

In [None]:
def get_image_paths_and_labels(img_dir, lbl_dir):
    """Return two lists: full image paths and binary labels (1=weapon, 0=no weapon)."""
    image_paths = []
    labels = []
    
    # Get all image files (common extensions)
    img_extensions = ('.jpg', '.jpeg', '.png')
    img_files = [f for f in os.listdir(img_dir) if f.lower().endswith(img_extensions)]
    
    for img_file in img_files:
        # Corresponding label file (replace extension with .txt)
        label_file = os.path.splitext(img_file)[0] + '.txt'
        label_path = os.path.join(lbl_dir, label_file)
        
        weapon_present = 0  # default: no weapon
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) >= 1:
                    class_id = int(parts[0])
                    if class_id == 1:  # weapon class
                        weapon_present = 1
                        break  # no need to check further
        else:
            print(f"Warning: label file not found for {img_file}")
        
        image_paths.append(os.path.join(img_dir, img_file))
        labels.append(weapon_present)
    
    return image_paths, labels

img_paths, labels = get_image_paths_and_labels(IMAGE_DIR, LABEL_DIR)

print(f"Total images: {len(img_paths)}")
print(f"Class distribution: weapon={sum(labels)}, no weapon={len(labels)-sum(labels)}")

## 3. Train / validation / test split

We'll use 70% train, 15% validation, 15% test.

In [None]:
# First split: train vs temp (val+test)
X_train, X_temp, y_train, y_temp = train_test_split(
    img_paths, labels, test_size=0.3, random_state=42, stratify=labels
)

# Second split: val vs test (50% of temp each)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train: {len(X_train)} images")
print(f"Validation: {len(X_val)} images")
print(f"Test: {len(X_test)} images")

## 4. Data generators with augmentation

We'll use `ImageDataGenerator` to load images on‑the‑fly and apply augmentation only to the training set.

In [None]:
IMG_SIZE = (224, 224)  # MobileNetV2 expected input size
BATCH_SIZE = 32

# Function to create a generator from lists of paths and labels
def create_generator(paths, labels, batch_size, augment=False):
    datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input,
        rotation_range=20 if augment else 0,
        width_shift_range=0.2 if augment else 0,
        height_shift_range=0.2 if augment else 0,
        horizontal_flip=True if augment else False,
        fill_mode='nearest'
    )
    
    # Create a DataFrame for flow_from_dataframe
    df = pd.DataFrame({'filename': paths, 'class': labels})
    generator = datagen.flow_from_dataframe(
        dataframe=df,
        x_col='filename',
        y_col='class',
        target_size=IMG_SIZE,
        batch_size=batch_size,
        class_mode='raw',          # raw because labels are already 0/1
        shuffle=True,
        seed=42
    )
    return generator

train_gen = create_generator(X_train, y_train, BATCH_SIZE, augment=True)
val_gen   = create_generator(X_val,   y_val,   BATCH_SIZE, augment=False)
test_gen  = create_generator(X_test,  y_test,  BATCH_SIZE, augment=False)

## 5. Build the model (Transfer Learning with MobileNetV2)

- Load MobileNetV2 without the top classification layer.
- Freeze the base model (use it as feature extractor).
- Add a global average pooling layer, a dense layer, and a final sigmoid output.

In [None]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False  # freeze

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

## 6. Train the model

We'll use early stopping to avoid overfitting.

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    callbacks=[early_stop]
)

## 7. Evaluate on test set

In [None]:
# Reset test generator to avoid shuffling issues
test_gen.reset()
test_loss, test_acc = model.evaluate(test_gen)
print(f"Test accuracy: {test_acc:.4f}")

# Predictions and true labels
y_pred_prob = model.predict(test_gen)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# True labels from test_gen (they are in the same order after reset)
y_true = test_gen.labels[:len(y_pred)]  # ensure same length

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=['No weapon', 'Weapon']))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['No weapon', 'Weapon'],
            yticklabels=['No weapon', 'Weapon'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## 8. Plot training history

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))

ax1.plot(history.history['loss'], label='train')
ax1.plot(history.history['val_loss'], label='validation')
ax1.set_title('Loss')
ax1.set_xlabel('Epoch')
ax1.legend()

ax2.plot(history.history['accuracy'], label='train')
ax2.plot(history.history['val_accuracy'], label='validation')
ax2.set_title('Accuracy')
ax2.set_xlabel('Epoch')
ax2.legend()

plt.tight_layout()
plt.show()

## 9. Show sample predictions

Visualise a few test images with true and predicted labels.

In [None]:
def show_predictions(model, paths, true_labels, num_samples=8):
    """Display images with true and predicted labels."""
    indices = random.sample(range(len(paths)), num_samples)
    plt.figure(figsize=(15, 8))
    for i, idx in enumerate(indices):
        img_path = paths[idx]
        true_label = true_labels[idx]
        
        # Load and preprocess image for prediction
        img = image.load_img(img_path, target_size=IMG_SIZE)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        
        pred_prob = model.predict(img_array, verbose=0)[0][0]
        pred_label = 1 if pred_prob > 0.5 else 0
        
        # Load original image for display
        orig_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        
        plt.subplot(2, 4, i+1)
        plt.imshow(orig_img)
        color = 'green' if pred_label == true_label else 'red'
        title = f"True: {'Weapon' if true_label else 'No weapon'}\nPred: {'Weapon' if pred_label else 'No weapon'}"
        plt.title(title, color=color)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

show_predictions(model, X_test, y_test, num_samples=8)

## 10. Save the model (optional)

You can save the trained model for later use.

In [None]:
# model.save("cctv_weapon_classifier.h5")

---
**Notebook complete.** Thank you for following along!