In [1]:
import os
import time
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm

from object_detection.utils import label_map_util

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
TEST_DIR = Path('D:/Data anomaly Youtube/data')
MASK_DIR = Path('D:/Data anomaly Youtube/Motion mask 16')
DETECTION_DIR = Path('D:/Data anomaly Youtube/output')

#VIDEOS_TO_RUN = range(1, 101)

In [3]:
PATH_TO_SAVED_MODEL = 'C:/model_zoo/my_faster_rcnn_v1/saved_model'
PATH_TO_LABEL_MAP = 'C:/tf_models/research/object_detection/data/mscoco_label_map.pbtxt'

category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABEL_MAP, use_display_name=True)

In [4]:
print('Loading model...')
start_time = time.time()

# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Loading model...
Done! Took 12.898751735687256 seconds


### Extract average images and run object detection model

In [5]:
%%time

DETECTION_DIR.mkdir(parents=True, exist_ok=True)

alpha = 0.02

for filename in os.scandir(TEST_DIR):
    entries = []

    cap = cv2.VideoCapture(str(filename.path))

    fps = int(cap.get(cv2.CAP_PROP_FPS) + 0.5)
    gap = fps // 5
    vid_id=str(filename).split(" ")[1].split(".")[0].split("'")[1]
    tqdm.write(f'Video {vid_id}, FPS = {cap.get(cv2.CAP_PROP_FPS)}')

    total_frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    mask_path = str(MASK_DIR)+"/"+vid_id+".jpg"
    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)

    avg_frame = None
    last_frame = None
    cnt_skipped = 0
    for idx in tqdm(range(total_frame_count), mininterval=1):
        ret, frame = cap.read()

        if not ret:
            print(f'Total frame read: {idx}')
            break

        if idx % gap == 0:
            if last_frame is not None:
                i1 = np.float32(frame)
                i2 = np.float32(last_frame)
                diff_frame = np.abs(i1 - i2)

                if np.max(diff_frame) < 5:
                    cnt_skipped += 1
                    continue

            last_frame = frame

            if avg_frame is None:
                avg_frame = frame
            else:
                avg_frame = (1 - alpha) * avg_frame + alpha * frame

            img = cv2.cvtColor(avg_frame.astype(np.float32), cv2.COLOR_BGR2RGB)
            img[mask == 0] = 0

            input_tensor = tf.convert_to_tensor(img, dtype=tf.uint8)

            height, width, _ = input_tensor.shape

            # Add batch dimension
            detections = detect_fn(input_tensor[tf.newaxis, ...])

            # All outputs are batches tensors.
            # Convert to numpy arrays, and take index [0] to remove the batch dimension.
            # We're only interested in the first num_detections.
            num_detections = int(detections.pop('num_detections'))
            detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}
            detections['num_detections'] = num_detections

            # detection_classes should be ints.
            detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

            for box, class_id, score in zip(detections['detection_boxes'], detections['detection_classes'],
                                            detections['detection_scores']):
                #if score > 0.2 and category_index[class_id]['name'] in ['car', 'bus', 'truck']:
                org_box = [box[1] * width, box[0] * height, box[3] * width, box[2] * height]
                entries.append((vid_id, idx, *org_box, score))

    df = pd.DataFrame(entries, columns=['video_id', 'frame_id', 'x_min', 'y_min', 'x_max', 'y_max', 'score'])
    df.to_csv(DETECTION_DIR / f'{vid_id}.csv', index=False)

    print(f'Video {vid_id}, skipped {cnt_skipped} frames')

  0%|          | 0/1318 [00:00<?, ?it/s]

Video 10_1, FPS = 29.969939105085793


100%|██████████| 1318/1318 [28:00<00:00,  1.28s/it]
  0%|          | 0/8280 [00:00<?, ?it/s]

Video 10_1, skipped 0 frames
Video 11_1, FPS = 29.969939571480538


100%|██████████| 8280/8280 [2:52:46<00:00,  1.25s/it]  
  0%|          | 0/16064 [00:00<?, ?it/s]

Video 11_1, skipped 0 frames
Video 12_1, FPS = 29.96994050787706


 49%|████▉     | 7836/16064 [11:24:21<11:58:35,  5.24s/it]   


KeyboardInterrupt: 