In [1]:
from dataset.svw import SVW
from tqdm import tqdm
svw = SVW()

videos = svw._get_filtered_videos()

In [2]:
from preprocessing.video import Video
import cv2
import numpy as np

def get_video_as_array(v: Video):
    cap = cv2.VideoCapture(v.video_reshaped_path.as_posix())
    video = [np.zeros((1080, 1920, 3))]
    while (cap.isOpened()):
        ret, img = cap.read()
        if not ret:
            break
        video.append(img)
    cap.release()
    return video


In [3]:
import tensorflow as tf

mobilenet = tf.keras.applications.MobileNetV3Large(
    input_shape=(224, 224, 3),
    alpha=1.0,
    minimalistic=False,
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    classes=1000,
    pooling='avg',
    dropout_rate=0.2,
    include_preprocessing=True,
)


def get_roi(frame, bbox):
    roi = vid_as_arr[frame][int(bbox[1]): int(bbox[1] + bbox[3]),int(bbox[0]): int(bbox[0] + bbox[2])]
    if roi.size == 0:
        return np.zeros((224, 224, 3))

    roi = cv2.resize(roi, (224, 224))
    # roi = preprocess_input(roi)
    return np.array(roi)






In [4]:
from pathlib import Path
from time import time

start_time = time()
vid = Video(Path('/home/rafa/SVW/Videos/golf/10___b918ec5abe94452795b4f0f65637bd84.mp4'))
vid_as_arr = get_video_as_array(vid)
df = vid.build_detections()
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 124 entries, 0 to 123
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   video         124 non-null    object
 1   frame_id      124 non-null    int64 
 2   bbox          124 non-null    object
 3   skeleton      124 non-null    object
 4   score         124 non-null    object
 5   person_id     124 non-null    int64 
 6   unique_color  124 non-null    object
dtypes: int64(2), object(5)
memory usage: 6.9+ KB


In [5]:
frames = df['frame_id'].to_list()
bboxes = df['bbox'].to_list()
rois = np.array([get_roi(frame, bbox) for frame, bbox in zip(frames, bboxes)])
print(rois.shape)
preds = mobilenet.predict(rois)
print(preds.shape)
preds_list = [val for val in preds]
df['visual_features'] = preds_list

print(time() - start_time)
df.head()


(124, 224, 224, 3)
(124, 960)
5.936913251876831


Unnamed: 0,video,frame_id,bbox,skeleton,score,person_id,unique_color,visual_features
0,/home/rafa/SVW/Videos/golf/10___b918ec5abe9445...,2,"[864, 275, 351, 394]","[957.0634, 388.14453, 0.14886431, 960.7271, 37...",0.7550662,1,"[82.0, 84.0, 163.0]","[12.5666, 1.7153093, 1.4526964, -0.0, 13.61994..."
1,/home/rafa/SVW/Videos/golf/10___b918ec5abe9445...,3,"[861, 271, 355, 397]","[970.1158, 347.79108, 0.05066243, 967.2939, 33...",0.79370624,1,"[82.0, 84.0, 163.0]","[14.457539, -0.0, -0.0, -0.0, 13.638592, 3.539..."
2,/home/rafa/SVW/Videos/golf/10___b918ec5abe9445...,4,"[857, 275, 354, 392]","[1000.3657, 379.90176, 0.14006604, 1005.20215,...",0.6644652,1,"[82.0, 84.0, 163.0]","[18.578241, 0.062375497, 0.022426415, -0.0, 13..."
3,/home/rafa/SVW/Videos/golf/10___b918ec5abe9445...,5,"[845, 277, 361, 391]","[1026.4132, 379.29807, 0.35073182, 1027.7922, ...",0.5797992,1,"[82.0, 84.0, 163.0]","[24.100742, 1.1887591, 0.44061238, -0.0, 10.73..."
4,/home/rafa/SVW/Videos/golf/10___b918ec5abe9445...,6,"[837, 267, 361, 401]","[1017.76044, 302.52054, 0.11567607, 1019.4441,...",0.49690774,1,"[82.0, 84.0, 163.0]","[22.167377, -0.0, 1.0997992, -0.0, 8.886408, 6..."


In [6]:
progress = tqdm(videos)
skipped = 0
for vid_path in progress:
    vid = Video(vid_path)
    try:
        df = vid.build_detections()
    except:
        continue

    progress.set_postfix({'path':vid.video_path.as_posix(), 'detections_num': df.shape, 'skipped': skipped})
    save_path = Video._add_suffix_to_path(vid_path, '_detections_avg_big_ext.pkl')
    if save_path.exists() :
        continue

    if df.empty:
        skipped += 1
        with open('/home/rafa/SVW/skipped.txt', 'a') as file:
            file.write(f'{vid_path.as_posix()} \t - empty \n')
        continue

    if  df.shape[0] > 2300:
        skipped += 1
        with open('/home/rafa/SVW/skipped.txt', 'a') as file:
            file.write(f'{vid_path.as_posix()} \t - long \n')
        continue

    start_time = time()
    vid_as_arr = get_video_as_array(vid)
    df['visual_features'] = df.apply(lambda row: get_roi(row.frame_id, row.bbox), axis=1)
    progress.set_postfix({'path': vid_path.as_posix(), 'detections_num': df.shape, 'load_time': time() - start_time})


    rois = np.array(df['visual_features'].to_list())
    preds = mobilenet.predict(rois, verbose=False)
    preds_list = [val for val in preds]
    df['visual_features'] = preds_list
    df.to_pickle(save_path)




100%|██████████| 3933/3933 [1:08:08<00:00,  1.04s/it, path=/home/rafa/SVW/Videos/baseball/2423___9845e29ab87745b082bfc50800996641.mp4, detections_num=(173, 11), load_time=0.396]       
