<a href="https://colab.research.google.com/github/net39/ML-anomaly-detection/blob/main/P2_04_Preprocess_vid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Libraries and Google Access

In [4]:
from google.colab import auth
auth.authenticate_user()

from google.colab import drive
drive.mount('/content/drive')

from google.colab import files
files.upload()

from google.cloud import storage
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/p2-anomaly-c4545180e308.json'

storage_client = storage.Client()
bucket_name = 'p2-anomaly'
bucket = storage_client.bucket(bucket_name)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Saving p2-anomaly-c4545180e308.json to p2-anomaly-c4545180e308 (1).json


In [13]:
# Install GCS and other dependencies
!pip install google-cloud-storage --quiet
!pip install opencv-python-headless --quiet
!pip install joblib --quiet
!pip install tensorflow --quiet
!pip install scikit-learn

# Imports
import os
import cv2
import numpy as np
import pandas as pd
import json
import time
import shutil
from datetime import datetime
from tqdm import tqdm
import joblib

from collections import defaultdict
from tensorflow.keras.applications import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model




# Define Project Folders

In [2]:
# Local working directory
local_work_dir = "/content/preprocessing"

# Ensure fresh local directory
if os.path.exists(local_work_dir):
    shutil.rmtree(local_work_dir)
os.makedirs(local_work_dir, exist_ok=True)

# Log file setup
log_file = os.path.join(local_work_dir, "preprocessing_log.txt")

def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_file, "a") as log:
        log.write(f"{timestamp} - {message}\n")
    print(f"{timestamp} - {message}")




# Start with Video EDA

In [7]:
# List video files in a specific chute
video_folder = 'raw/videos/'
video_extensions = ('.avi')

blobs = list(bucket.list_blobs(prefix=video_folder))
video_files = [blob.name for blob in blobs if blob.name.lower().endswith(video_extensions)]

log_message(f"Found {len(video_files)} video files in {video_folder} and its subfolders.")

2025-06-16 07:50:03 - Found 192 video files in raw/videos/ and its subfolders.


In [10]:
# Group videos by chute
chute_videos = defaultdict(list)

for file in video_files:
    parts = file.split('/')
    if len(parts) >= 3:
        chute = parts[2]
        chute_videos[chute].append(file)

for chute, files in chute_videos.items():
    log_message(f"{len(files)} videos found in {chute}")

2025-06-16 08:04:07 - 8 videos found in chute01
2025-06-16 08:04:07 - 8 videos found in chute02
2025-06-16 08:04:07 - 8 videos found in chute03
2025-06-16 08:04:07 - 8 videos found in chute04
2025-06-16 08:04:07 - 8 videos found in chute05
2025-06-16 08:04:07 - 8 videos found in chute06
2025-06-16 08:04:07 - 8 videos found in chute07
2025-06-16 08:04:07 - 8 videos found in chute08
2025-06-16 08:04:07 - 8 videos found in chute09
2025-06-16 08:04:07 - 8 videos found in chute10
2025-06-16 08:04:07 - 8 videos found in chute11
2025-06-16 08:04:07 - 8 videos found in chute12
2025-06-16 08:04:07 - 8 videos found in chute13
2025-06-16 08:04:07 - 8 videos found in chute14
2025-06-16 08:04:07 - 8 videos found in chute15
2025-06-16 08:04:07 - 8 videos found in chute16
2025-06-16 08:04:07 - 8 videos found in chute17
2025-06-16 08:04:07 - 8 videos found in chute18
2025-06-16 08:04:07 - 8 videos found in chute19
2025-06-16 08:04:07 - 8 videos found in chute20
2025-06-16 08:04:07 - 8 videos found in 

# Video Size

In [17]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3
FRAME_INTERVAL = 30  # extract one frame every 30 frames


In [18]:
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
model = Model(inputs=base_model.input, outputs=base_model.output)

In [19]:
def preprocess_frame(frame):
    # Noise reduction
    frame = cv2.GaussianBlur(frame, (5, 5), 0)

    # Resize to IMG_HEIGHT x IMG_WIDTH
    frame = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))

    return frame

# Object Detection

In [26]:
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

def detect_humans(frame):
    (regions, _) = hog.detectMultiScale(frame,
                                        winStride=(4, 4),
                                        padding=(8, 8),
                                        scale=1.05)

    for (x, y, w, h) in regions:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    return frame, len(regions)

# Edge Detection

In [27]:
def apply_edge_detection(frame):
    edges = cv2.Canny(frame, 100, 200)
    return edges

In [28]:
bg_subtractor = cv2.createBackgroundSubtractorMOG2()

def apply_background_subtraction(frame):
    fg_mask = bg_subtractor.apply(frame)
    return fg_mask

# Overall Code for Frame Extract, Detection and Edge Detection

In [29]:
for chute, files in chute_videos.items():
    chute_dir = os.path.join(local_work_dir, chute)
    preprocessed_dir = os.path.join(chute_dir, 'preprocessed')
    detected_dir = os.path.join(chute_dir, 'detected')
    os.makedirs(preprocessed_dir, exist_ok=True)
    os.makedirs(detected_dir, exist_ok=True)

    for file_path in files:
        video_name = os.path.basename(file_path).split('.')[0]
        video_local_path = os.path.join(local_work_dir, f"{video_name}.avi")

        # Download video
        blob = bucket.blob(file_path)
        blob.download_to_filename(video_local_path)
        log_message(f"Downloaded {video_name}")

        cap = cv2.VideoCapture(video_local_path)
        frame_count = 0
        saved_frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            if frame_count % FRAME_INTERVAL == 0:
                preprocessed_frame = preprocess_frame(frame)

                # Object Detection
                detected_frame, detections = detect_humans(preprocessed_frame)

                # Edge Detection (optional)
                # edges_frame = apply_edge_detection(preprocessed_frame)

                # Save preprocessed frame
                preprocessed_frame_filename = f"{video_name}_frame{frame_count}_preprocessed.jpg"
                preprocessed_frame_path = os.path.join(preprocessed_dir, preprocessed_frame_filename)
                cv2.imwrite(preprocessed_frame_path, preprocessed_frame)

                # Save detected frame
                detected_frame_filename = f"{video_name}_frame{frame_count}_detected.jpg"
                detected_frame_path = os.path.join(detected_dir, detected_frame_filename)
                cv2.imwrite(detected_frame_path, detected_frame)

                saved_frame_count += 1

            frame_count += 1

        cap.release()
        os.remove(video_local_path)
        log_message(f"Extracted {saved_frame_count} frames from {video_name}")

2025-06-16 08:29:25 - Downloaded cam1
2025-06-16 08:29:27 - Extracted 53 frames from cam1
2025-06-16 08:29:30 - Downloaded cam2
2025-06-16 08:29:31 - Extracted 53 frames from cam2
2025-06-16 08:29:35 - Downloaded cam3
2025-06-16 08:29:36 - Extracted 53 frames from cam3
2025-06-16 08:29:40 - Downloaded cam4
2025-06-16 08:29:42 - Extracted 53 frames from cam4
2025-06-16 08:29:45 - Downloaded cam5
2025-06-16 08:29:47 - Extracted 53 frames from cam5
2025-06-16 08:29:51 - Downloaded cam6
2025-06-16 08:29:52 - Extracted 53 frames from cam6
2025-06-16 08:29:55 - Downloaded cam7
2025-06-16 08:29:57 - Extracted 53 frames from cam7
2025-06-16 08:30:00 - Downloaded cam8
2025-06-16 08:30:02 - Extracted 52 frames from cam8
2025-06-16 08:30:05 - Downloaded cam1
2025-06-16 08:30:06 - Extracted 28 frames from cam1
2025-06-16 08:30:08 - Downloaded cam2
2025-06-16 08:30:09 - Extracted 29 frames from cam2
2025-06-16 08:30:11 - Downloaded cam3
2025-06-16 08:30:12 - Extracted 27 frames from cam3
2025-06-16

In [30]:
preprocessed_folder = 'preprocessed-mcfd/'
detected_folder = 'detected-mcfd/'

for chute in os.listdir(local_work_dir):
    chute_path = os.path.join(local_work_dir, chute)
    if os.path.isdir(chute_path):
        preprocessed_path = os.path.join(chute_path, 'preprocessed')
        detected_path = os.path.join(chute_path, 'detected')

        # Upload preprocessed frames
        if os.path.exists(preprocessed_path):
            for frame_file in os.listdir(preprocessed_path):
                local_frame_path = os.path.join(preprocessed_path, frame_file)
                gcs_frame_path = os.path.join(preprocessed_folder, chute, frame_file)
                blob = bucket.blob(gcs_frame_path)
                blob.upload_from_filename(local_frame_path)
            log_message(f"Uploaded preprocessed frames for {chute}")

        # Upload detected frames
        if os.path.exists(detected_path):
            for frame_file in os.listdir(detected_path):
                local_frame_path = os.path.join(detected_path, frame_file)
                gcs_frame_path = os.path.join(detected_folder, chute, frame_file)
                blob = bucket.blob(gcs_frame_path)
                blob.upload_from_filename(local_frame_path)
            log_message(f"Uploaded detected frames for {chute}")

log_message("✅ All videos processed, frames preprocessed, detected and uploaded.")

2025-06-16 08:44:35 - Uploaded preprocessed frames for chute16
2025-06-16 08:46:23 - Uploaded detected frames for chute16
2025-06-16 08:47:44 - Uploaded preprocessed frames for chute12
2025-06-16 08:49:04 - Uploaded detected frames for chute12
2025-06-16 08:50:27 - Uploaded preprocessed frames for chute10
2025-06-16 08:51:51 - Uploaded detected frames for chute10
2025-06-16 08:57:13 - Uploaded preprocessed frames for chute24
2025-06-16 09:02:36 - Uploaded detected frames for chute24
2025-06-16 09:04:22 - Uploaded preprocessed frames for chute21
2025-06-16 09:06:08 - Uploaded detected frames for chute21
2025-06-16 09:14:01 - Uploaded preprocessed frames for chute23
2025-06-16 09:21:54 - Uploaded detected frames for chute23
2025-06-16 09:24:02 - Uploaded preprocessed frames for chute14
2025-06-16 09:26:11 - Uploaded detected frames for chute14
2025-06-16 09:27:40 - Uploaded preprocessed frames for chute19
2025-06-16 09:29:09 - Uploaded detected frames for chute19
2025-06-16 09:31:09 - Up

# Upload to GCS

In [31]:
!cp "/content/drive/MyDrive/Colab Notebooks/P2-04_Preprocess-vid.ipynb" "/content/P2-04_Preprocess-vid.ipynb"

client = storage.Client(project='p2-anomaly')
bucket = client.bucket('p2-anomaly')
notebook_blob = bucket.blob('notebooks/P2-04_Preprocess-vid.ipynb')
notebook_blob.upload_from_filename('/content/P2-04_Preprocess-vid.ipynb')
print("Pushed to GCS.")

Pushed to GCS.
