In [1]:
%matplotlib inline

import time
from pathlib import Path

from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(gpus[0], 'GPU')

MODEL_NAME = "my_faster_rcnn_v1"
PATH_TO_SAVED_MODEL = str(Path("exported-models") / MODEL_NAME / "saved_model")
PATH_TO_LABEL_MAP = "data/label_map.pbtxt"
DO_VISUALIZATION = True

In [2]:
print("Loading model...", end='')
start_time = time.time()

# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Loading model...Done! Took 12.83111834526062 seconds


In [3]:
if DO_VISUALIZATION:
  from object_detection.utils import label_map_util
  from object_detection.utils import visualization_utils as viz_utils

  # COCO_LABEL_MAP = Path(r"H:\Workspace\VinBigData\TensorFlow\models\research\object_detection\data\mscoco_label_map.pbtxt")
  MY_LABEL_MAP = "data/label_map.pbtxt"

  category_index = label_map_util.create_category_index_from_labelmap(
      str(MY_LABEL_MAP), use_display_name=True)

In [4]:
TEST_FOLDER = "/home/ubuntu/Track4/AIC21-Track4-Anomaly-Detection/aic21-track4-test-data"
MASK_DIR = "output/motion/test"

alpha = 0.02

for vid_id in range(1, 41):
    entries = []

    video_path = Path(TEST_FOLDER) / f"{vid_id}.mp4"
    cap = cv2.VideoCapture(str(video_path))
    
    fps = int(cap.get(cv2.CAP_PROP_FPS) + 0.5)
    gap = fps // 5
    tqdm.write(f"FPS = {cap.get(cv2.CAP_PROP_FPS)}")

    total_frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    mask_path = Path(MASK_DIR) / f"{vid_id:03d}.jpg"
    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)

    avg_frame = None
    last_frame = None
    cnt_skipped = 0
    for idx in tqdm(range(total_frame_count), mininterval=1):
        ret, frame = cap.read()

        if not ret:
            print(f"Total frame read: {idx}")
            break

        if idx % gap == 0:
            if last_frame is not None:
                i1 = np.float32(frame)
                i2 = np.float32(last_frame)
                diff_frame = np.abs(i1 - i2)

                if np.max(diff_frame) < 5:
                    cnt_skipped += 1
                    continue
            
            last_frame = frame

            if avg_frame is None:
                avg_frame = frame
            else:
                avg_frame = (1 - alpha) * avg_frame + alpha * frame

            img = cv2.cvtColor(avg_frame.astype(np.float32), cv2.COLOR_BGR2RGB)
            img[mask == 0] = 0

            input_tensor = tf.convert_to_tensor(img, dtype=tf.uint8)

            height, width, _ = input_tensor.shape

            # Add batch dimension
            detections = detect_fn(input_tensor[tf.newaxis, ...])

            # All outputs are batches tensors.
            # Convert to numpy arrays, and take index [0] to remove the batch dimension.
            # We're only interested in the first num_detections.
            num_detections = int(detections.pop('num_detections'))
            detections = {key: value[0, :num_detections].numpy()
                        for key, value in detections.items()}
            detections['num_detections'] = num_detections

            # detection_classes should be ints.
            detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

            for box, class_id, score in zip(detections['detection_boxes'], detections['detection_classes'], detections['detection_scores']):
                if score > 0.2:
                    org_box = [box[1] * width, box[0] * height, box[3] * width, box[2] * height]
                    entries.append((vid_id, idx, *org_box, score))

            if False:
                image_np_with_detections = input_tensor.numpy()

                viz_utils.visualize_boxes_and_labels_on_image_array(
                    image_np_with_detections,
                    detections['detection_boxes'],
                    detections['detection_classes'],
                    detections['detection_scores'],
                    category_index,
                    use_normalized_coordinates=True,
                    max_boxes_to_draw=200,
                    min_score_thresh=.40,
                    agnostic_mode=False)

                plt.figure()
                plt.imshow(image_np_with_detections)
                plt.show()
                break
    df = pd.DataFrame(entries, columns=['video_id', 'frame_id', 'x_min', 'y_min', 'x_max', 'y_max', 'score'])
    df.to_csv(f"output/raw/{vid_id}.csv", index=False)

    print(f"Video {vid_id}, skipped {cnt_skipped} frames")

FPS = 30.0


  0%|          | 0/26760 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [8]:
print(detections)

{'detection_multiclass_scores': array([[0.00279889],
       [0.00278807],
       [0.00277618],
       [0.00276221],
       [0.00275496],
       [0.00274804],
       [0.002739  ],
       [0.00267872],
       [0.00267343],
       [0.00264334],
       [0.00262787],
       [0.00262463],
       [0.00260912],
       [0.00257282],
       [0.00255543],
       [0.00254337],
       [0.00253686],
       [0.00251244],
       [0.00250181],
       [0.00249929],
       [0.00248025],
       [0.00247505],
       [0.00243898],
       [0.00243517],
       [0.00241982],
       [0.00240759],
       [0.00239909],
       [0.00238773],
       [0.00237018],
       [0.00236152],
       [0.00235433],
       [0.0023266 ],
       [0.00231309],
       [0.00231053],
       [0.00230679],
       [0.0022984 ],
       [0.00228339],
       [0.00226884],
       [0.00226477],
       [0.00225989],
       [0.00222413],
       [0.0021717 ],
       [0.00215882],
       [0.00215809],
       [0.00214191],
       [0.00213525],
  