---
# Demo Use Case - Queries and Analytics on Video (Part 3)

The data scientist now wants to focus on video frames in which people were detected.

The metadata for the video stream produced by the object detection job is loaded into a Pandas dataframe.
This dataframe is used to allow the data scientist to view any image stored in SDP.
They can also filter, sort, and aggregate the dataframe using methods that data scientists are familar with.

---

### Install dependencies

See [install_dependencies.ipynb](install_dependencies.ipynb).

### How to use this Notebook
1. Click *Kernel* -> *Restart Kernel and Run All Cells*.

### Import dependencies

In [None]:
%load_ext autoreload
%autoreload 2

from matplotlib import pyplot as plt
import IPython
import cv2
import itertools
import numpy as np
import pandas as pd
import json
import base64
import datetime
import time
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display
import ipywidgets as widgets
from pathlib import Path
import grpc
import imp
import pravega.grpc_gateway as pravega
import pravega.video as video
from pravega.video import UnindexedStream, OutputStream, IndexedStream, opencv_image_to_mpl
from matplotlib import pyplot as plt
from copy import copy
import os

imp.reload(video);

### Define Pravega stream parameters

In [None]:
#gateway = os.environ['PRAVEGA_GRPC_GATEWAY_ADDRESS']
#gateway = '10.246.27.131:54672'
gateway = 'pravega-grpc-gateway.examples.frightful-four.eaglemonk.intranet.nautilus-platform-dev.com:80'
scope = 'examples'
stream = 'object-detector-input-video'
#stream = 'object-detector-output-video'

### Initialize connection to Pravega GRPC Gateway

In [None]:
pravega_channel = grpc.insecure_channel(gateway, options=[
        ('grpc.max_receive_message_length', 9*1024*1024),
    ])
pravega_client = pravega.grpc.PravegaGatewayStub(pravega_channel)

### Build timestamp index
This is an index from timestamp to begin stream cut, end stream cut, and event pointer.

This indexed video player uses the event pointer to fetch frames.

In [None]:
imp.reload(video);
import pravega.video as video
from pravega.video import UnindexedStream, OutputStream, IndexedStream, opencv_image_to_mpl

In [None]:
indexed_stream = IndexedStream(pravega_client, scope, stream)

In [None]:
%%time
indexed_stream.update_index(force_full=False)

In [None]:
len(indexed_stream.index_df)

In [None]:
def clean_recognitions(recognitions):
    return ','.join(np.unique([r['title'] for r in recognitions]))

In [None]:
#indexed_stream.index_df['recog'] = indexed_stream.index_df.recognitions.apply(clean_recognitions)

In [None]:
# First and last index record
indexed_stream.index_df.iloc[[0,-1]].T

### Video Player

In [None]:
imp.reload(video);
import pravega.video as video
from pravega.video import UnindexedStream, OutputStream, IndexedStream, opencv_image_to_mpl, VideoPlayer
imp.reload(video);

player = VideoPlayer(indexed_stream)

player.interact()

### Filtered and Sorted Player

In [None]:
indexed_stream.index_df.camera.value_counts()

In [None]:
#indexed_stream.index_df.recog.value_counts()

In [None]:
#pd.DataFrame(indexed_stream.index_df.groupby(['camera','recog']).size()).unstack().fillna('-')

In [None]:
df = indexed_stream.index_df.copy()
df = df[df.camera==0]
#df = df[df.recog!='']
#df = df[df.recog.str.contains('person')]
#df = df[df.recog.str.contains('boat')]
#df = df[df.recog.str.contains('bus,person')]
#df = df[df.recog.str.contains('motorbike')]
#df = df[df.recog.str.contains('train')]
#df = df[df.recog.str.contains('chair')]
#df = df[df.recog.str.contains('dog')]
#df = df[df.recog.str.contains('sofa')]
len(df)

In [None]:
filtered_stream = copy(indexed_stream)
filtered_stream.index_df = df
player = PravegaVideoPlayer(filtered_stream)
player.interact()

# Playground

In [None]:
import gzip
import base64
def decode_stream_cut_text(text):
    """Based on StreamCutImpl.java"""
    plaintext = gzip.decompress(base64.b64decode(text)).decode('utf-8')
    split = plaintext.split(':', 5)
    stream = split[1]
    segment_numbers = [int(s) for s in split[2].split(',')]
    epochs = [int(s) for s in split[3].split(',')]
    offsets = [int(s) for s in split[4].split(',')]
    zipped = list(zip(zip(segment_numbers, epochs), offsets))
    positions = dict(zipped)
    return {
        'plaintext': plaintext,
        'stream': stream,
        'positions': positions,   # map from (segment_number, epoch) to offset
    }

In [None]:
decode_stream_cut_text('H4sIAAAAAAAAADOwSq1IzC3ISS3Wz0/KSk0u0U1JLQFS+UW6+aUlBaUlumWZKan5VgZAaGhhbmBkaQAAX4Y7zDMAAAA=')

In [None]:
decode_stream_cut_text('H4sIAAAAAAAAADOwSq1IzC3ISS3Wz0/KSk0u0U1JLQFS+UW6+aUlBaUlumWZKan5VgZAaGhpbmliYgAAjxhMyzMAAAA=')