In [2]:
import av
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import Video
import time
from mvextractor.videocap import VideoCap as MVCap

In [3]:
SAMPLE_VIDEO = "/mnt/c/Skripsi/dataset-h264/R002A120/S018C001P008R002A120_rgb.mp4"

In [21]:
display_video = Video()

display_video.from_file(SAMPLE_VIDEO)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00#3ymdat\x00\x00\x00…

In [22]:
# https://ffmpeg.org/doxygen/3.2/structAVMotionVector.html
# (-1,  8, 16,    4,    8,    4,    8, 0,   0, 0, 4)
# data composition:
# (source, width, height, src_x, src_y, dest_x, dest_y, flags, motion_x, motion_y, motion_scale)
def get_mvect_tuple_to_rect(mvect_tuple):
    s, w, h, s_x, s_y, d_x, d_y, _, m_x, m_y, m_s = mvect_tuple
    t_d_x = s_x - (m_x/m_s) # true dest x
    t_d_y = s_y - (m_y/m_s) # true dest x

    return s, s_x, s_y, int(t_d_x), int(t_d_y), w, h

In [23]:
pyav_container = av.open(SAMPLE_VIDEO, "r")

VIDEO_CODEC_NAME = (pyav_container.streams.video[0].codec.name)
VIDEO_WIDTH = int(pyav_container.streams.video[0].codec_context.width)
VIDEO_HEIGHT = int(pyav_container.streams.video[0].codec_context.height)
VIDEO_FPS = int(pyav_container.streams.video[0].base_rate)

print([VIDEO_CODEC_NAME,VIDEO_WIDTH,VIDEO_HEIGHT,VIDEO_FPS],sep = "\n")

pyav_container.streams.video[0].codec_context.export_mvs = True
pyav_container.streams.video[0].codec_context.fast = True
pyav_container.streams.video[0].codec_context.skip_manual = True

['h264', 1920, 1080, 30]


In [24]:
vid_frames = pyav_container.decode(video=0)
counter = 0
for _ in vid_frames:
    counter += 1
print(counter)


KeyboardInterrupt: 

In [None]:
start_timer = time.perf_counter_ns()
vid_frame = next(vid_frames)
motion_vectors_raw = vid_frame.side_data.get('MOTION_VECTORS').to_ndarray()
frame_type = vid_frame.pict_type
end_timer = time.perf_counter_ns()
print(frame_type, *motion_vectors_raw,sep="\n")
print(1/((end_timer-start_timer) / 1000000))

StopIteration: 

In [27]:
# using extract_mvs
mv_capturer = MVCap()
mv_capturer.open(SAMPLE_VIDEO)

True

In [28]:
start_timer = time.perf_counter_ns()
mv_data = mv_capturer.read()
end_timer = time.perf_counter_ns()
print(*mv_data[2],sep="\n")
print(1/((end_timer-start_timer) / 1000000))


0.011484370111067755


In [29]:
start_timer = time.perf_counter_ns()
while True:
    mv_data = mv_capturer.read()
    if not mv_data[0]:
        break
    if len(mv_data[2]) == 0:
        continue
    print(mv_data[2][0],sep="\n")
end_timer = time.perf_counter_ns()
print(((end_timer-start_timer) / 1000000))

[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[ 1 16 16  8  8  8  8  0  0  4]
[ 1 16 16  8  8  8  8  0  2  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8 10  8  8  0  8  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[ 1 16 16  8  8  8  8  0  0  4]
[ 1 16 16  8  8  8  8  0  0  4]
[-1 16 16  8  8  8  8  0  0  4]
[-1 16 1

In [23]:
#conversion logic:
# flows_x = np.zeros((video_height, video_width), dtype=float)
# flows_y = np.zeros((video_height, video_width), dtype=float)

def mvect_round(val, low, high):
    if val < low:
        return 0
    
    if val > high:
        return 255

    return round(255 * ((val-low)/(high-low)))

def gen_mvect_flows(flows_x, flows_y, motion_vector):

    block_size_x = motion_vector[1]
    block_size_y = motion_vector[2]
    dst_x = motion_vector[5]
    dst_y = motion_vector[6]
    motion_x = motion_vector[7]
    motion_y = motion_vector[8]

    str_y = int(dst_y-block_size_y/2)
    str_x = int(dst_x-block_size_x/2)
    end_y = int(dst_y-block_size_y/2+block_size_y)
    end_x = int(dst_x-block_size_x/2+block_size_x)

    flows_x[str_y:end_y, str_x:end_x] = motion_x 
    flows_y[str_y:end_y, str_x:end_x] = motion_y 

    return flows_x,flows_y 


In [22]:
# using extract_mvs -> numpy
mv_capturer = MVCap()
mv_capturer.open(SAMPLE_VIDEO)

mv_data = mv_capturer.read()
video_size_hw = mv_data[1].shape[:-1]

last_flows_x = np.zeros(video_size_hw, dtype=np.uint8)
last_flows_y = np.zeros(video_size_hw, dtype=np.uint8)

flow_list_x = []
flow_list_y = []

bound = 15
twice_bound = 30

start_timer = time.perf_counter_ns()
while True:

    if not mv_data[0]:
        break
    if len(mv_data[2]) == 0:
        flow_list_x.append(last_flows_x)
        flow_list_y.append(last_flows_y)
        mv_data = mv_capturer.read()
        continue

    flows_x = np.zeros(video_size_hw, dtype=np.int16)
    flows_y = np.zeros(video_size_hw, dtype=np.int16)

    for mv in mv_data[2]:

        if mv[0] > 0:
            continue

        flows_x, flows_y = gen_mvect_flows(flows_x, flows_y, mv)
        
        
    # bound is rephrased
    # translate to 255 first from 2*bound
    # then if < 0 which is lower than bound and > 1 which is higher than bound  
    
    flows_x = 255 * ((flows_x + bound) / twice_bound)
    flows_y = 255 * ((flows_y + bound) / twice_bound)

    flows_x[flows_x < 0] = 0
    flows_y[flows_y < 0] = 0

    flows_x[flows_x > 255] = 255
    flows_y[flows_y > 255] = 255

    flows_x = flows_x.astype(np.uint8)
    flows_y = flows_y.astype(np.uint8)

    last_flows_x = flows_x
    last_flows_y = flows_y

    flow_list_x.append(flows_x)
    flow_list_y.append(flows_y)

    mv_data = mv_capturer.read()
    
end_timer = time.perf_counter_ns()
print(((end_timer-start_timer) / 1000000))

5891.523853


In [29]:
# using extract_mvs -> numpy
mv_capturer = MVCap()
mv_capturer.open(SAMPLE_VIDEO)

mv_data = mv_capturer.read()
video_size_hw = mv_data[1].shape[:-1]

last_flows_x = np.zeros(video_size_hw, dtype=np.uint8)
last_flows_y = np.zeros(video_size_hw, dtype=np.uint8)

flow_list_x = []
flow_list_y = []

bound = 15
twice_bound = 30
inverse_twice_bound_with_max = 255/30

start_timer = time.perf_counter_ns()
while True:

    if not mv_data[0]:
        break
    if len(mv_data[2]) == 0:
        flow_list_x.append(last_flows_x)
        flow_list_y.append(last_flows_y)
        mv_data = mv_capturer.read()
        continue

    flows_x = np.zeros(video_size_hw, dtype=np.int16)
    flows_y = np.zeros(video_size_hw, dtype=np.int16)

    for mv in mv_data[2]:

        if mv[0] > 0:
            continue

        flows_x, flows_y = gen_mvect_flows(flows_x, flows_y, mv)
        
        
    # bound is rephrased
    # translate to 255 first from 2*bound
    # then if < 0 which is lower than bound and > 1 which is higher than bound  
    
    flows_x = inverse_twice_bound_with_max * (flows_x + bound)
    flows_y = inverse_twice_bound_with_max * (flows_y + bound)

    flows_x[flows_x < 0] = 0
    flows_y[flows_y < 0] = 0

    flows_x[flows_x > 255] = 255
    flows_y[flows_y > 255] = 255

    flows_x = flows_x.astype(np.uint8)
    flows_y = flows_y.astype(np.uint8)

    last_flows_x = flows_x
    last_flows_y = flows_y

    flow_list_x.append(flows_x)
    flow_list_y.append(flows_y)

    mv_data = mv_capturer.read()
    
end_timer = time.perf_counter_ns()
print(((end_timer-start_timer) / 1000000))

TypeError: only integer scalar arrays can be converted to a scalar index

In [31]:
mv_data[2][0][0]

-1

In [17]:
print(*flow_list_x[20])

[127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [127 127 127 ...   0   0   0] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [187 187 187 ... 136 136 136] [119 119 119 ... 127 127 127] [119 119 1

In [66]:
fps = int(30)
height = int(1080)
width = int(1920)
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')

writer = cv2.VideoWriter("test_flow_y.avi", fourcc, fps, (width,height))

for frame in flow_list_y:
    print(np.array([frame,frame,frame]).shape)
    writer.write(np.dstack((frame,frame,frame)))

writer.release()

(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080, 1920)
(3, 1080

In [None]:
%pwd

'/home/nicholassv/Jupyter'