# 1. PROBLEM STATEMENT
To create a multiple classification model that can classify the crime and normal incidents when exposed to such, as one of the 14 classes.

# 2. DATA
The data is taken from Kaggle, named, "UCF Crime Dataset". It is an extensive dataset with huge amount of videos in each of the 14 classes; 13 with anomaly videos and 1 with normal videos. The dataset folder also contains certain .docx files regarding the structure of dataset and a readme.txt file.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 3. EVALUATION
The model will be evaluated on a separate test dataset using accuracy, precision, recall, and F1-score to measure its effectiveness in detecting and classifying violent activities.

# 4. FEATURES
Each video will be decomposed into frames that will serve as the model’s primary features.
The extracted features should include spatial information (visual appearance, object positions) and temporal information (motion patterns across frames).
Together, these features enable the CNN–LSTM model to learn both what is happening in a frame and how it evolves over time.

In [None]:
# Importing Libraries
import cv2
import os
import random
from pathlib import Path
from tqdm import tqdm
import shutil

In [None]:
# Set Configurations
BASE_DIR= Path("/content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)")

FRAMES_DIR= BASE_DIR/"Frames"
SPLIT_RATIO= 0.8 #rest 20% for testimg
FRAME_SIZE= (224, 224)
FRAME_SKIP= 5

## Source Folders
SOURCE_FOLDER1 = [
    BASE_DIR / "Anomaly-Videos-Part-1"/ "Anomaly-Videos-Part-1",
    BASE_DIR / "Anomaly-Videos-Part-2"/ "Anomaly-Videos-Part-2"]
SOURCE_FOLDER2 = [
    BASE_DIR / "Anomaly-Videos-Part-3"/ "Anomaly-Videos-Part-3",
    BASE_DIR / "Anomaly-Videos-Part-4"/ "Anomaly-Videos-Part-4"]
SOURCE_FOLDER3 = [
    BASE_DIR / "Normal_Videos_for_Event_Recognition",
    BASE_DIR / "Testing_Normal_Videos_Anomaly"/ "Testing_Normal_Videos_Anomaly"]
SOURCE_FOLDER4 = [
    BASE_DIR / "Anomaly-Videos-Part_5"
]
SOURCE_FOLDER5= [
    BASE_DIR / "Explosion",
]

In [None]:
# helper functions
def extract_frames(video_path, output_folder, frame_skip=FRAME_SKIP):
  """
  Extracts frames from a single video into output folder.
  """
  cap= cv2.VideoCapture(str(video_path))
  if not cap.isOpened():
    print(f"Could not open {video_path}")
    return

  frame_count= int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  os.makedirs(output_folder, exist_ok=True)
  frame_idx= 0
  saved= 0

  while True:
    ret, frame= cap.read()
    if not ret:
      break
    if frame_idx % frame_skip == 0:
      frame= cv2.resize(frame, FRAME_SIZE)
      frame_file= output_folder/f"frame_{saved:05d}.jpg"
      cv2.imwrite(str(frame_file), frame)
      saved += 1
    frame_idx += 1
  cap.release()

In [None]:
# extraction pipeline
def main():
  all_videos= []

  # Collect all .mp4 videos
  for folder in SOURCE_FOLDER1:
    if not folder.exists():
      continue
    for class_dir in folder.iterdir():
      if not class_dir.is_dir():
        continue

      # Normalize class name
      if "Normal" in class_dir.name:
        label= "Normal"
      else:
        label = class_dir.name.strip()

      videos=  list(class_dir.glob("*.mp4"))
      for v in videos:
        all_videos.append((v,label))

  # Group by class and split into train/test
  class_to_videos= {}
  for path, label in all_videos:
    class_to_videos.setdefault(label, []).append(path)

  print(f"\n Found {len(class_to_videos)} classes:")
  for cls, vids in class_to_videos.items():
    print(f"{cls}: {len(vids)} videos")

  # Extract frames into frames/train/ and frames/test/
  for cls, videos in class_to_videos.items():
    random.shuffle(videos)
    split_idx= int(len(videos) * SPLIT_RATIO)
    train_videos= videos[:split_idx]
    test_videos= videos[split_idx:]

    print(f"\n {cls}: {len(train_videos)} train, {len(test_videos)} test")

    # Train set
    for idx, video_path in enumerate(tqdm(train_videos, desc=f"Extracting {cls} (train)")):
      clip_folder= FRAMES_DIR/"train"/cls/f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    # Test set
    for idx, video_path in enumerate(tqdm(test_videos, desc=f"Extracting {cls} (test)")):
      clip_folder= FRAMES_DIR / "test" / cls / f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    print("\n Frame extraction completed successfully!")
    print(f"Frames saved under: {FRAMES_DIR}")

# RUN
if __name__ == "__main__":
  main()


 Found 4 classes:
Abuse: 51 videos
Arrest: 50 videos
Arson: 50 videos
Assault: 50 videos

 Abuse: 40 train, 11 test


Extracting Abuse (train): 100%|██████████| 40/40 [08:13<00:00, 12.33s/it]
Extracting Abuse (test): 100%|██████████| 11/11 [05:30<00:00, 30.05s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Arrest: 40 train, 10 test


Extracting Arrest (train): 100%|██████████| 40/40 [18:47<00:00, 28.19s/it]
Extracting Arrest (test): 100%|██████████| 10/10 [02:15<00:00, 13.53s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Arson: 40 train, 10 test


Extracting Arson (train): 100%|██████████| 40/40 [18:10<00:00, 27.27s/it]
Extracting Arson (test): 100%|██████████| 10/10 [01:41<00:00, 10.13s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Assault: 40 train, 10 test


Extracting Assault (train): 100%|██████████| 40/40 [07:40<00:00, 11.50s/it]
Extracting Assault (test): 100%|██████████| 10/10 [01:54<00:00, 11.48s/it]


 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames





In [None]:
# extraction pipeline
def main():
  all_videos= []

  # Collect all .mp4 videos
  for folder in SOURCE_FOLDER2:
    if not folder.exists():
      continue
    for class_dir in folder.iterdir():
      if not class_dir.is_dir():
        continue

      # Normalize class name
      if "Normal" in class_dir.name:
        label= "Normal"
      else:
        label = class_dir.name.strip()

      videos=  list(class_dir.glob("*.mp4"))
      for v in videos:
        all_videos.append((v,label))

  # Group by class and split into train/test
  class_to_videos= {}
  for path, label in all_videos:
    class_to_videos.setdefault(label, []).append(path)

  print(f"\n Found {len(class_to_videos)} classes:")
  for cls, vids in class_to_videos.items():
    print(f"{cls}: {len(vids)} videos")

  # Extract frames into frames/train/ and frames/test/
  for cls, videos in class_to_videos.items():
    random.shuffle(videos)
    split_idx= int(len(videos) * SPLIT_RATIO)
    train_videos= videos[:split_idx]
    test_videos= videos[split_idx:]

    print(f"\n {cls}: {len(train_videos)} train, {len(test_videos)} test")

    # Train set
    for idx, video_path in enumerate(tqdm(train_videos, desc=f"Extracting {cls} (train)")):
      clip_folder= FRAMES_DIR/"train"/cls/f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    # Test set
    for idx, video_path in enumerate(tqdm(test_videos, desc=f"Extracting {cls} (test)")):
      clip_folder= FRAMES_DIR / "test" / cls / f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    print("\n Frame extraction completed successfully!")
    print(f"Frames saved under: {FRAMES_DIR}")

# RUN
if __name__ == "__main__":
  main()


 Found 6 classes:
RoadAccidents: 150 videos
Robbery: 150 videos
Shooting: 50 videos
Shoplifting: 50 videos
Stealing: 100 videos
Vandalism: 50 videos

 RoadAccidents: 120 train, 30 test


Extracting RoadAccidents (train): 100%|██████████| 120/120 [10:48<00:00,  5.40s/it]
Extracting RoadAccidents (test): 100%|██████████| 30/30 [03:25<00:00,  6.86s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Robbery: 120 train, 30 test


Extracting Robbery (train): 100%|██████████| 120/120 [21:02<00:00, 10.52s/it]
Extracting Robbery (test): 100%|██████████| 30/30 [06:00<00:00, 12.01s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Shooting: 40 train, 10 test


Extracting Shooting (train): 100%|██████████| 40/40 [07:26<00:00, 11.16s/it]
Extracting Shooting (test): 100%|██████████| 10/10 [01:39<00:00,  9.94s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Shoplifting: 40 train, 10 test


Extracting Shoplifting (train): 100%|██████████| 40/40 [16:29<00:00, 24.74s/it]
Extracting Shoplifting (test): 100%|██████████| 10/10 [04:08<00:00, 24.85s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Stealing: 80 train, 20 test


Extracting Stealing (train): 100%|██████████| 80/80 [21:50<00:00, 16.38s/it]
Extracting Stealing (test): 100%|██████████| 20/20 [06:50<00:00, 20.53s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Vandalism: 40 train, 10 test


Extracting Vandalism (train): 100%|██████████| 40/40 [07:15<00:00, 10.89s/it]
Extracting Vandalism (test): 100%|██████████| 10/10 [02:21<00:00, 14.10s/it]


 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames





In [None]:
# extraction pipeline
def main():
  all_videos= []

  # Collect all .mp4 videos
  for folder in SOURCE_FOLDER3:
    if not folder.exists():
      continue
    for class_dir in folder.iterdir():
      if not class_dir.is_dir():
        continue

      # Normalize class name
      if "Normal" in class_dir.name:
        label= "Normal"
      else:
        label = class_dir.name.strip()

      videos=  list(class_dir.glob("*.mp4"))
      for v in videos:
        all_videos.append((v,label))

  # Group by class and split into train/test
  class_to_videos= {}
  for path, label in all_videos:
    class_to_videos.setdefault(label, []).append(path)

  print(f"\n Found {len(class_to_videos)} classes:")
  for cls, vids in class_to_videos.items():
    print(f"{cls}: {len(vids)} videos")

  # Extract frames into frames/train/ and frames/test/
  for cls, videos in class_to_videos.items():
    random.shuffle(videos)
    split_idx= int(len(videos) * SPLIT_RATIO)
    train_videos= videos[:split_idx]
    test_videos= videos[split_idx:]

    print(f"\n {cls}: {len(train_videos)} train, {len(test_videos)} test")

    # Train set
    for idx, video_path in enumerate(tqdm(train_videos, desc=f"Extracting {cls} (train)")):
      clip_folder= FRAMES_DIR/"train"/cls/f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    # Test set
    for idx, video_path in enumerate(tqdm(test_videos, desc=f"Extracting {cls} (test)")):
      clip_folder= FRAMES_DIR / "test" / cls / f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    print("\n Frame extraction completed successfully!")
    print(f"Frames saved under: {FRAMES_DIR}")

# RUN
if __name__ == "__main__":
  main()


 Found 1 classes:
Normal: 50 videos

 Normal: 40 train, 10 test


Extracting Normal (train): 100%|██████████| 40/40 [10:24<00:00, 15.61s/it]
Extracting Normal (test): 100%|██████████| 10/10 [00:55<00:00,  5.56s/it]


 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames





In [None]:
# extraction pipeline
def main():
  all_videos= []

  # Collect all .mp4 videos
  for folder in SOURCE_FOLDER4:
    if not folder.exists():
      continue
    for class_dir in folder.iterdir():
      if not class_dir.is_dir():
        continue

      # Normalize class name
      if "FightingA" in class_dir.name:
        label= "Fight"
      else:
        label = class_dir.name.strip()

      videos=  list(class_dir.glob("*.mp4"))
      for v in videos:
        all_videos.append((v,label))

  # Group by class and split into train/test
  class_to_videos= {}
  for path, label in all_videos:
    class_to_videos.setdefault(label, []).append(path)

  print(f"\n Found {len(class_to_videos)} classes:")
  for cls, vids in class_to_videos.items():
    print(f"{cls}: {len(vids)} videos")

  # Extract frames into frames/train/ and frames/test/
  for cls, videos in class_to_videos.items():
    random.shuffle(videos)
    split_idx= int(len(videos) * SPLIT_RATIO)
    train_videos= videos[:split_idx]
    test_videos= videos[split_idx:]

    print(f"\n {cls}: {len(train_videos)} train, {len(test_videos)} test")

    # Train set
    for idx, video_path in enumerate(tqdm(train_videos, desc=f"Extracting {cls} (train)")):
      clip_folder= FRAMES_DIR/"train"/cls/f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    # Test set
    for idx, video_path in enumerate(tqdm(test_videos, desc=f"Extracting {cls} (test)")):
      clip_folder= FRAMES_DIR / "test" / cls / f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    print("\n Frame extraction completed successfully!")
    print(f"Frames saved under: {FRAMES_DIR}")

# RUN
if __name__ == "__main__":
  main()


 Found 2 classes:
Burglary: 50 videos
Fight: 50 videos

 Burglary: 40 train, 10 test


Extracting Burglary (train): 100%|██████████| 40/40 [11:58<00:00, 17.96s/it]
Extracting Burglary (test): 100%|██████████| 10/10 [02:00<00:00, 12.06s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Fight: 40 train, 10 test


Extracting Fight (train): 100%|██████████| 40/40 [15:27<00:00, 23.20s/it]
Extracting Fight (test): 100%|██████████| 10/10 [05:26<00:00, 32.62s/it]


 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames





In [None]:
# extraction pipeline
def main():
  all_videos= []

  # Collect all .mp4 videos
  for folder in SOURCE_FOLDER5:
    if not folder.exists():
      continue
    for class_dir in folder.iterdir():
      if not class_dir.is_dir():
        continue

      # Normalize class name
      if "Normal" in class_dir.name:
        label= "Normal"
      else:
        label = class_dir.name.strip()

      videos=  list(class_dir.glob("*.mp4"))
      for v in videos:
        all_videos.append((v,label))

  # Group by class and split into train/test
  class_to_videos= {}
  for path, label in all_videos:
    class_to_videos.setdefault(label, []).append(path)

  print(f"\n Found {len(class_to_videos)} classes:")
  for cls, vids in class_to_videos.items():
    print(f"{cls}: {len(vids)} videos")

  # Extract frames into frames/train/ and frames/test/
  for cls, videos in class_to_videos.items():
    random.shuffle(videos)
    split_idx= int(len(videos) * SPLIT_RATIO)
    train_videos= videos[:split_idx]
    test_videos= videos[split_idx:]

    print(f"\n {cls}: {len(train_videos)} train, {len(test_videos)} test")

    # Train set
    for idx, video_path in enumerate(tqdm(train_videos, desc=f"Extracting {cls} (train)")):
      clip_folder= FRAMES_DIR/"train"/cls/f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    # Test set
    for idx, video_path in enumerate(tqdm(test_videos, desc=f"Extracting {cls} (test)")):
      clip_folder= FRAMES_DIR / "test" / cls / f"clip_{idx+1:04d}"
      extract_frames(video_path, clip_folder)

    print("\n Frame extraction completed successfully!")
    print(f"Frames saved under: {FRAMES_DIR}")

# RUN
if __name__ == "__main__":
  main()


 Found 2 classes:
Explosion: 49 videos
Shooting: 50 videos

 Explosion: 39 train, 10 test


Extracting Explosion (train): 100%|██████████| 39/39 [07:35<00:00, 11.69s/it]
Extracting Explosion (test): 100%|██████████| 10/10 [01:07<00:00,  6.79s/it]



 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames

 Shooting: 40 train, 10 test


Extracting Shooting (train): 100%|██████████| 40/40 [1:06:21<00:00, 99.53s/it] 
Extracting Shooting (test): 100%|██████████| 10/10 [23:20<00:00, 140.04s/it]


 Frame extraction completed successfully!
Frames saved under: /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames





In [None]:
import shutil, os
shutil.rmtree(FRAMES_DIR / "train"/ "Normal", ignore_errors=True)

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

IMG_SIZE= (224, 224)
SEQUENCE_LENGTH= 30 #frames per clip

In [None]:
import tensorflow as tf

In [None]:
FRAMES_DIR = Path("/content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames")   # where your frames live: /content/frames/train, /content/frames/test
OUT_DIR    = Path("/content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames/ARGUS_TFRECORDS")  # where to save tfrecords (Drive OK, few files)
SEQ_LEN    = 30         # frames per clip (pad/trunc)
IMG_SIZE   = (224,224)  # resize
compress_jpeg = True    # store frames as jpeg bytes inside TFRecord (reduces disk & TFRecord size)

In [None]:
OUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
def _bytes_feature(b): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[b]))
def _int64_feature(i): return tf.train.Feature(int64_list=tf.train.Int64List(value=[i]))

In [None]:
# collect classes from train folder
train_dir = FRAMES_DIR / "train"
classes = sorted([d.name for d in train_dir.iterdir() if d.is_dir()])
print("Detected classes:", classes)

# save classes mapping for later
np.save(str(OUT_DIR / "classes.npy"), np.array(classes))

label_encoder = LabelEncoder().fit(classes)

Detected classes: ['Abuse', 'Arrest', 'Arson', 'Assault', 'Burglary', 'Explosion', 'Fight', 'Normal', 'RoadAccidents', 'Robbery', 'Shooting', 'Shoplifting', 'Stealing', 'Vandalism']


In [None]:
def clip_to_tfrecord(clip_path, label_int):
    """Serialize one clip folder to a tf.train.Example"""
    files = sorted([f for f in os.listdir(clip_path) if f.lower().endswith(('.jpg','.jpeg','.png'))])
    frames_bytes = []

    for fname in files[:SEQ_LEN]:
        img = cv2.imread(str(clip_path / fname))
        if img is None:
            continue
        img = cv2.resize(img, IMG_SIZE)
        if compress_jpeg:
            ok, enc = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
            if not ok:
                continue
            frames_bytes.append(enc.tobytes())
        else:
            # raw bytes (float32 is big) -> store uint8 raw
            frames_bytes.append(img.tobytes())

    # pad by repeating last frame if needed
    if len(frames_bytes) == 0:
        return None
    while len(frames_bytes) < SEQ_LEN:
        frames_bytes.append(frames_bytes[-1])

    feature = {
        "label": _int64_feature(int(label_int)),
        "num_frames": _int64_feature(len(frames_bytes)),
        "height": _int64_feature(IMG_SIZE[0]),
        "width": _int64_feature(IMG_SIZE[1]),
        "channels": _int64_feature(3),
        "clip_name": _bytes_feature(str(clip_path.name).encode("utf-8")),
        # frames as repeated bytes; store concatenated with separator? TF supports bytes_list
        "frames": tf.train.Feature(bytes_list=tf.train.BytesList(value=frames_bytes)),
    }
    example = tf.train.Example(features=tf.train.Features(feature=feature))
    return example

In [None]:
# function to write TFRecord for one split and one class
def write_class_tfrecord(split, cls):
    class_dir = FRAMES_DIR / split / cls
    if not class_dir.exists():
        print("Missing:", class_dir); return
    out_file = OUT_DIR / f"{split}_{cls}.tfrecord"
    if out_file.exists():
        print(f"Skipping (exists): {out_file}")
        return
    writer = tf.io.TFRecordWriter(str(out_file))
    label_int = int(label_encoder.transform([cls])[0])
    clips = sorted([p for p in class_dir.iterdir() if p.is_dir()])
    print(f"Writing {out_file}  ({len(clips)} clips)")
    for clip in tqdm(clips):
        ex = clip_to_tfrecord(clip, label_int)
        if ex is None:
            continue
        writer.write(ex.SerializeToString())
    writer.close()
    print("Saved:", out_file)

In [None]:
# Write records class-by-class for both splits
for split in ("train","test"):
    for cls in classes:
        write_class_tfrecord(split, cls)

print("All TFRecords created in:", OUT_DIR)
print("Saved class list to:", OUT_DIR / "classes.npy")

Skipping (exists): /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames/ARGUS_TFRECORDS/train_Abuse.tfrecord
Skipping (exists): /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames/ARGUS_TFRECORDS/train_Arrest.tfrecord
Skipping (exists): /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames/ARGUS_TFRECORDS/train_Arson.tfrecord
Skipping (exists): /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames/ARGUS_TFRECORDS/train_Assault.tfrecord
Skipping (exists): /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Frames/ARGUS_TFRECORDS/train_Burglary.tfrecord
Skipping (exists): /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Dron

## High-level flow:

1. Read TFRecords with tf.data (parse, decode JPEG bytes → frames tensor).

2. Batch & shuffle with tf.data so training reads only small chunks into RAM.

3. Apply light augmentation (optional, frame-wise or temporal).

4. Model: per-frame CNN (feature extractor) wrapped with TimeDistributed → temporal model (LSTM or GRU) → attention → dense classifier.

5. Train with model.fit() on the tf.data datasets, use callbacks (checkpoint, reduce LR).

6. Save final weights and small artifacts (classes.npy) to Drive.

In [None]:
SEQ_LEN = 30
IMG_H, IMG_W = 224, 224
AUTOTUNE= tf.data.AUTOTUNE

In [None]:
feature_desc= {
    "label": tf.io.FixedLenFeature([], tf.int64),
    "num_frames": tf.io.FixedLenFeature([], tf.int64),
    "height": tf.io.FixedLenFeature([], tf.int64),
    "width": tf.io.FixedLenFeature([], tf.int64),
    "channels": tf.io.FixedLenFeature([], tf.int64),
    "clip_name": tf.io.FixedLenFeature([], tf.string),
    "frames": tf.io.VarLenFeature(tf.string)  # list of jpeg bytes
}

In [None]:
def _parse_example(serialized):
  ex= tf.io.parse_single_example(serialized, feature_desc)
  frames_sparse= ex["frames"] #SparseTensor of bytes
  frames= tf.sparse.to_dense(frames_sparse, default_value=b'')
  # decode each jpeg
  def decode_fn(b):
    img= tf.image.decode_jpeg(b, channels=3)
    img= tf.image.resize(img, [IMG_H, IMG_W])
    img= tf.cast(img, tf.float32) / 255.0
    return img

  frames= tf.map_fn(decode_fn, frames, dtype= tf.float32)
  frames= tf.reshape(frames, (SEQ_LEN, IMG_H, IMG_W, 3))
  label= tf.cast(ex["label"], tf.int32)
  return frames, label

In [None]:
# 3. Build train/val datasets with batching and prefetching
def make_dataset(tfrecord_paths, num_classes, batch_size=4, shuffle_buffer=256, training=True):
    ds = tf.data.TFRecordDataset(tfrecord_paths, num_parallel_reads=AUTOTUNE)
    if training:
        ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(_parse_example, num_parallel_calls=AUTOTUNE)
    # convert int label -> one-hot
    ds = ds.map(lambda x, y: (x, tf.one_hot(y, depth=num_classes)),
                num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

In [None]:
#usage of above function
classes= np.load(OUT_DIR/ "classes.npy")
num_classes= len(classes)
train_paths= [str(p) for p in OUT_DIR.glob("train_*.tfrecord")]
val_paths= [str(p) for p in OUT_DIR.glob("test_*tfrecord")]
batch_size = 4
train_ds = make_dataset(train_paths,num_classes, batch_size=batch_size, training=True)
val_ds   = make_dataset(val_paths, num_classes, batch_size=batch_size, training=False)

print("Train TFRecords:", len(train_paths))
print("Val TFRecords:", len(val_paths))

Instructions for updating:
Use fn_output_signature instead


Train TFRecords: 14
Val TFRecords: 14


## ARGUS Architecture
Pre-filter (Motion Thresholding & Person Count)  --> CNN (mobilenetV2) --> LSTM (BiLSTM) --> Spatial + Temporal Attention --> Dense Layer (final output of prediction)

In [None]:
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam

def build_argus_model(seq_len=SEQ_LEN, img_size=(IMG_H, IMG_W), num_classes=num_classes, feature_dim= 512):
   # 1) Base CNN: MobileNetV2 (frame-level feature extractor)
  base_cnn= MobileNetV2(
      include_top= False,
      pooling= "avg",
      input_shape= (img_size[0], img_size[1], 3),
      weights= "imagenet"
  )
  base_cnn.trainable= False  # start frozen; fine-tune later

  # 2) Input: sequence of frames
  frames_in= layers.Input(shape=(seq_len, img_size[0], img_size[1], 3), name="frames")

  # 3) TimeDistributed CNN: apply MobileNetV2 to each frame
  x = layers.TimeDistributed(base_cnn, name="frame_cnn")(frames_in)  # (B, T, cnn_feat_dim)

  # project features to lower dimension
  x = layers.TimeDistributed(layers.Dense(feature_dim, activation='relu'), name="frame_fc")(x)

  # 4) Temporal Modelling using BiLSTM
  x= layers.Bidirectional(
      layers.LSTM(256, return_sequences=True),
      name="bilstm"
  )(x)                   # (B, T, 512)

  # 5) Attention over time (Temporal Attention)
  # score per timestep
  attn_scores= layers.Dense(1, activation="tanh")(x)
  attn_scores= layers.Flatten()(attn_scores)
  attn_weights= layers.Activation("softmax", name="attn_weights")(attn_scores)

  # make weights broadcastable: (B, T, 1)
  attn_weights = layers.RepeatVector(x.shape[-1])(attn_weights)  # (B, feat, T)
  attn_weights = layers.Permute([2, 1])(attn_weights)            # (B, T, feat)

  # weighted sum of LSTM outputs
  x = layers.Multiply()([x, attn_weights])                       # (B, T, feat)
  x = layers.Lambda(lambda t: tf.reduce_sum(t, axis=1), name="attn_pool")(x)  # (B, feat)

  # 6) Classification Model
  x = layers.Dense(256, activation='relu')(x)
  x = layers.Dropout(0.5)(x)
  out = layers.Dense(num_classes, activation='softmax', name="predictions")(x)

  model = Model(frames_in, out, name="ARGUS_ViolenceDetector")
  return model

model = build_argus_model()
model.summary()

model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


## Training Cell
(PHASE 1: Backbone Frozen)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import os

In [None]:
SAVE_DIR= "/content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained"
os.makedirs(SAVE_DIR, exist_ok=True)
checkpoint_path = os.path.join(SAVE_DIR, "argus_best_model.h5")

In [None]:
# Callbacks
callbacks=[
    ModelCheckpoint(
      checkpoint_path,
      monitor="val_loss",
      save_best_only=True,
      save_weights_only= False,
      verbose=1
 ),
    EarlyStopping(
      monitor="val_loss",
      patience=5,
      restore_best_weights=True
 ),
    ReduceLROnPlateau(
      monitor="val_loss",
      factor=0.3,
      patience=3,
      verbose=1
 )
]

In [None]:
# Training

EPOCHS = 10

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=callbacks
)

Epoch 1/10
    190/Unknown [1m183s[0m 319ms/step - accuracy: 0.1960 - loss: 2.4924




Epoch 1: val_loss improved from inf to 2.55680, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 605ms/step - accuracy: 0.1963 - loss: 2.4921 - val_accuracy: 0.1623 - val_loss: 2.5568 - learning_rate: 1.0000e-04
Epoch 2/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 320ms/step - accuracy: 0.1947 - loss: 2.5788
Epoch 2: val_loss improved from 2.55680 to 2.40550, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 408ms/step - accuracy: 0.1952 - loss: 2.5774 - val_accuracy: 0.2618 - val_loss: 2.4055 - learning_rate: 1.0000e-04
Epoch 3/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step - accuracy: 0.2596 - loss: 2.3728
Epoch 3: val_loss improved from 2.40550 to 2.37513, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 410ms/step - accuracy: 0.2601 - loss: 2.3714 - val_accuracy: 0.2827 - val_loss: 2.3751 - learning_rate: 1.0000e-04
Epoch 4/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 320ms/step - accuracy: 0.3136 - loss: 2.1054
Epoch 4: val_loss improved from 2.37513 to 2.35819, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 409ms/step - accuracy: 0.3141 - loss: 2.1042 - val_accuracy: 0.2827 - val_loss: 2.3582 - learning_rate: 1.0000e-04
Epoch 5/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 320ms/step - accuracy: 0.3799 - loss: 1.9445
Epoch 5: val_loss improved from 2.35819 to 2.33433, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 415ms/step - accuracy: 0.3804 - loss: 1.9432 - val_accuracy: 0.2513 - val_loss: 2.3343 - learning_rate: 1.0000e-04
Epoch 6/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321ms/step - accuracy: 0.4646 - loss: 1.6522
Epoch 6: val_loss did not improve from 2.33433
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 399ms/step - accuracy: 0.4650 - loss: 1.6513 - val_accuracy: 0.2723 - val_loss: 2.3618 - learning_rate: 1.0000e-04
Epoch 7/10
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317ms/step - accuracy: 0.5100 - loss: 1.4530
Epoch 7: val_loss did not improve from 2.33433
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 398ms/step - accuracy: 0.5105 - loss: 1.4521 - val_accuracy: 0.2670 - val_loss: 2.4415 - learning_rate: 1.0000e-04
Epoch 8/10
[1m190/190[0m [32m━━━━━━━

(Phase 2: Unfreeze backbone + fine-tune)

In [None]:
base_cnn = model.get_layer("frame_cnn").layer
base_cnn.trainable = True

model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

history_fine = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=callbacks
)

Epoch 1/20
    190/Unknown [1m621s[0m 1s/step - accuracy: 0.1666 - loss: 2.6666
Epoch 1: val_loss improved from 2.33433 to 2.32694, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m678s[0m 2s/step - accuracy: 0.1672 - loss: 2.6649 - val_accuracy: 0.2723 - val_loss: 2.3269 - learning_rate: 1.0000e-05
Epoch 2/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2089 - loss: 2.4830
Epoch 2: val_loss improved from 2.32694 to 2.29769, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 1s/step - accuracy: 0.2093 - loss: 2.4819 - val_accuracy: 0.2827 - val_loss: 2.2977 - learning_rate: 1.0000e-05
Epoch 3/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2214 - loss: 2.3651
Epoch 3: val_loss improved from 2.29769 to 2.28731, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 1s/step - accuracy: 0.2220 - loss: 2.3640 - val_accuracy: 0.2723 - val_loss: 2.2873 - learning_rate: 1.0000e-05
Epoch 4/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2390 - loss: 2.3189
Epoch 4: val_loss improved from 2.28731 to 2.28347, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 1s/step - accuracy: 0.2394 - loss: 2.3179 - val_accuracy: 0.2775 - val_loss: 2.2835 - learning_rate: 1.0000e-05
Epoch 5/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2528 - loss: 2.2734
Epoch 5: val_loss improved from 2.28347 to 2.27323, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 1s/step - accuracy: 0.2533 - loss: 2.2722 - val_accuracy: 0.2565 - val_loss: 2.2732 - learning_rate: 1.0000e-05
Epoch 6/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2489 - loss: 2.2491
Epoch 6: val_loss improved from 2.27323 to 2.25952, saving model to /content/drive/MyDrive/DetectionWithDroneModel(ARGUS)/VideoDetectionDataset/Drone_Detection_Dataset(Unzipped Files)/Model_Trained/argus_best_model.h5




[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 1s/step - accuracy: 0.2495 - loss: 2.2479 - val_accuracy: 0.2670 - val_loss: 2.2595 - learning_rate: 1.0000e-05
Epoch 7/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.3481 - loss: 2.0329
Epoch 7: val_loss did not improve from 2.25952
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 1s/step - accuracy: 0.3485 - loss: 2.0318 - val_accuracy: 0.2618 - val_loss: 2.2817 - learning_rate: 1.0000e-05
Epoch 8/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.3281 - loss: 2.0511
Epoch 8: val_loss did not improve from 2.25952
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 1s/step - accuracy: 0.3285 - loss: 2.0500 - val_accuracy: 0.2565 - val_loss: 2.2922 - learning_rate: 1.0000e-05
Epoch 9/20
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[