## 1.1 Imports and setup

Import the different dependencies from installed standard modules

In [None]:
import sys
import glob, os
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline
from plotly import tools

from scipy.spatial import distance
from scipy import linalg
from scipy import signal

%matplotlib inline
offline.init_notebook_mode()
pd.options.display.float_format = '{:.6f}'.format
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

os.getcwd()

Now the ad-hoc created modules for this project. We use the jupyter magics %load_ext autoreload and %autoreload set to 2.
Imported classes are located in the ../scripts folder of our volume

In [None]:
sys.path.insert(0, '../../scripts/asset_processor/')
# load the autoreload extension
%load_ext autoreload
# Set extension to reload modules every time before executing code
%autoreload 2

from video_asset_processor import VideoAssetProcessor
from video_metrics import video_metrics

## 1.2 Process one asset and a number of renditions for analysis

In this notebook we are evaluating a single asset and comparing the behavior of different renditions made at different resolutions and bitrates (500kbps and 250kbps) and with a watermark (@500kbps).
The VideoAssetProcessor.process function returns a dictionary containing the time history of the defined metrics (in this case only pixel difference ratio).

In [None]:
original_asset = '../../stream/1080p/Ems8epLlmuo.mp4'
#original_asset = '../../stream/single-asset/original/bbb-cut.mp4'
original_asset_name = original_asset.split('/')[-1]
renditions_list = [original_asset,
                   '../../stream/1080p_watermark/{}'.format(original_asset_name),
                   '../../stream/144p/{}'.format(original_asset_name),
                   '../../stream/720p/{}'.format(original_asset_name),
                   '../../stream/480p/{}'.format(original_asset_name),
                   '../../stream/240p/{}'.format(original_asset_name),
                   '../../stream/144p_watermark/{}'.format(original_asset_name),
                   '../../stream/240p_watermark/{}'.format(original_asset_name),
                   '../../stream/720p_watermark/{}'.format(original_asset_name),
                   '../../stream/480p_watermark/{}'.format(original_asset_name),
                   '../../stream/144p_low_bitrate_4/{}'.format(original_asset_name),
                   '../../stream/240p_low_bitrate_4/{}'.format(original_asset_name),
                   '../../stream/720p_low_bitrate_4/{}'.format(original_asset_name),
                   '../../stream/480p_low_bitrate_4/{}'.format(original_asset_name),
                  ]
#renditions_list = [
#                  '../../stream/single-asset/240@250Kbps/bbb-cut.mp4',
#                  '../../stream/single-asset/720@500Kbps/bbb-cut.mp4',
#                  '../../stream/single-asset/720@250Kbps/bbb-cut.mp4',
#                  '../../stream/single-asset/720@500Kbps_watermark/bbb-cut.mp4']
seconds = 1
max_samples = 30

metrics_list = ['temporal_gaussian', 
                'temporal_difference',
                'temporal_gaussian_difference_threshold', 
                'temporal_dct'
                    ]
asset_processor = VideoAssetProcessor(original_asset, renditions_list, metrics_list, seconds, max_samples, False)
print('Processing')
metrics_df = asset_processor.process()
print('Completed')

Rearrange the obtained data and put it in a pandas DataFrame for easier handling.

In [None]:
display(metrics_df)

In [None]:
displayed_metric = 'temporal_gaussian_difference_threshold-series'

### 1.2.1 Plot the results: single original asset

We will be using plotly's excellent library to output the measured instantaneous difference between a frame and its subsequent

In [None]:
data_df = metrics_df[metrics_df['attack']=='1080p']
rendition_df = metrics_df[metrics_df['attack']=='144p']

X = list(range(0, len(rendition_df['temporal_difference-series'].values[0])))

from scipy import signal
yn = data_df['temporal_difference-series'].values[0]
b, a = signal.butter(3, 0.05)
zi = signal.lfilter_zi(b, a)
z, _ = signal.lfilter(b, a, yn, zi=zi*yn[0])
z2, _ = signal.lfilter(b, a, z, zi=zi*z[0])

y = signal.filtfilt(b, a, yn)

trace0 = go.Scatter(
        x = X,
        y = y,
        name = 'Difference',
        mode = 'lines'
    )

trace1 = go.Scatter(
        x = X,
        y = yn,
        name = 'Difference-filter',
        mode = 'lines'
    )


trace2 = go.Scatter(
        x = X,
        y = rendition_df['temporal_dct-series'].values[0],
        name = 'DCT',
        mode = 'lines'
    )

data = [trace0, trace1, trace2]

layout = {"title": 'temporal_difference', 
      "legend":{"x": .6, "y":.95},
      "xaxis": {"title": "Frame", }, 
      "yaxis": {"title": 'temporal_difference'},
      "hovermode":"closest"
      }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)


In [None]:
import matplotlib.pyplot as plt

df =  pd.DataFrame()

df['u'] = np.array(rendition_df['temporal_difference-series'].values[0])

figsize = (7, 2.75)
kw = dict(marker='o', linestyle='none', color='r', alpha=0.3)

threshold = df['u'].mean()/2
df['pandas'] = df['u'].rolling(window=5, center=False).median().fillna(method='bfill').fillna(method='ffill')

difference = np.abs(df['u'] - df['pandas'])
outlier_idx = difference > threshold

fig, ax = plt.subplots(figsize=figsize)
df['u'].plot()
print(df['u'][outlier_idx].index)
if len(df['u'][outlier_idx].index) > 0:
    df['u'][outlier_idx].plot(**kw)
    print(df['u'][outlier_idx].index)

### 1.2.2 Plot the results: compare against the created renditions

In [None]:
data=[]

for rendition in renditions_list:
    rendition_split = rendition.replace('../../stream/','').split('/')
    rendition_name = '{}-{}'.format(rendition_split[1].replace(original_asset_name, 'Video-1'),rendition_split[0])
    
    data_df = metrics_df[metrics_df['path']==rendition]

    X = list(range(0, len(data_df[displayed_metric].values[0])))
    
    trace = go.Scatter(
            x = X,
            y = data_df[displayed_metric].values[0],
            name = rendition_name,
            mode = 'lines'
        )

    data.append(trace)
    
layout = {"title": displayed_metric, 
          "legend":{"x": .9, "y":.95},
          "xaxis": {"title": "Frame", }, 
          "yaxis": {"title": displayed_metric},
          "hovermode":"closest"
          }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

## 1.3 Compare renditions by measuring different distances between their time series
We will treat the resulting time series of instant pixel differences as a vector that enables us to compare different renditions.
In order to be able to compare their behaviors individually we need a single scalar to do it. This will be the Euclidean distance. Other methods for analyzing time series similarity exist, but it happens so that Euclidean distance does a good job in our case. More sophysticated methods like Dynamic Time Warping (DWT) are in order when the characteristics of the time series present delayed patterns, as they allow for many-to-one point comparisions. In our analysis, only point-to-point distances aremore efficient.

### 1.3.1 Rearrange the time series dataframe

In [None]:
renditions_df = pd.DataFrame()
frames = []
for rendition in renditions_list:
    
    rendition_df = metrics_df[metrics_df['path']==rendition][displayed_metric]

    rendition_df = rendition_df.reset_index(drop=True).transpose()
    frames.append(pd.DataFrame(rendition_df.values[0]))

renditions_df = pd.concat(frames,axis=1)
renditions_df.columns = renditions_list
renditions_df = renditions_df.astype(float)
display(renditions_df.head())

### 1.3.2 Compute the cosine, euclidean and Hamming distances of the raw time series

In [None]:
x_original = np.array(renditions_df[original_asset].values)

distances = {}
renditions = [x for x in renditions_list if x != original_asset] 
for rendition in renditions:
    
    rendition_split = rendition.replace('../../stream/','').split('/')
    rendition_name = '{}-{}'.format(rendition_split[1].replace(original_asset_name, 'Video-1'),rendition_split[0])
    x = np.array(renditions_df[rendition].values)
    corr = np.correlate(x_original, x, mode='same') 
    mean = np.mean(abs(x_original-x))
    std = np.std(abs(x_original-x))
    max_val = np.max(abs(x_original-x))
    euclidean = distance.euclidean(x_original, x)
    dwt = video_metrics.dtw_distance(x_original, x)
    cosine = distance.cosine(x_original,x)
    [[manhattan]] = 1/abs(1-distance.cdist(x_original.reshape(1,-1), x.reshape(1,-1), metric='cityblock'))
    distances[rendition_name] = {'Euclidean': euclidean  ,  'Mean': mean, 'std':std, 'DWT': dwt }

distances_raw_df = pd.DataFrame.from_dict(distances,orient='index')

fig = tools.make_subplots(rows=1, cols=len(distances_raw_df.columns))

counter = 0
for metric in distances_raw_df.columns:   
    counter += 1
    trace = go.Scatter(
                x = distances_raw_df[metric].index,
                y = distances_raw_df[metric].values,
                name = metric,
                mode='markers',
                text='{}'.format(metric)
            )

    fig.append_trace(trace, 1, counter)

    fig['layout'].update(height = 450, 
                         width = 800, 
                         title = displayed_metric, 
                         yaxis1 = dict(
                                automargin=True,
                                 title="Value"
                                ),
                         xaxis1 = dict(
                                tickangle=60,
                                automargin=True
                                ),
                         xaxis2 = dict(
                                tickangle=60,
                                automargin=True
                                ),
                         xaxis3 = dict(
                                tickangle=60,
                                automargin=True
                                ),
                         xaxis4 = dict(
                                tickangle=60,
                                automargin=True
                                ))
offline.iplot(fig)


### Compute the cosine distances and euclidean distance of the smoothed time series

In [None]:
def fourierFilter(x):
    n = x.size
    n_harm = 50                     # number of harmonics in model
    t = np.arange(0, n)
    p = np.polyfit(t, x, 1)         # find linear trend in x
    x_notrend = x - p[0] * t        # detrended x
    x_freqdom = np.fft.fft(x_notrend)  # detrended x in frequency domain
    f = np.fft.fftfreq(n)              # frequencies
    indexes = list(range(n))
    # sort indexes by frequency, lower -> higher
    indexes.sort(key = lambda i: np.absolute(f[i]))
 
    t = np.arange(0, n)
    restored_sig = np.zeros(t.size)
    for i in indexes[:1 + n_harm * 2]:
        ampli = np.absolute(x_freqdom[i]) / n   # amplitude
        phase = np.angle(x_freqdom[i])          # phase
        restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
    return restored_sig + p[0] * t
    

In [None]:
data=[]
for rendition in renditions_list:
    x = np.array(renditions_df[rendition].values)
    extrapolation = fourierFilter(x)
    rendition_split = rendition.replace('../../stream/','').split('/')
    rendition_name = '{}-{}'.format(rendition_split[1].replace(original_asset_name, 'Video-1'),rendition_split[0])
    
    trace = go.Scatter(
                x = np.arange(0, extrapolation.size),
                y = extrapolation,
                name = rendition_name,
                mode = 'lines'
            )

    data.append(trace)

layout = {"title": displayed_metric, 
          "legend":{"x": .9, "y":1},
          "xaxis": {"title": "Frame", }, 
          "yaxis": {"title": displayed_metric},
          "hovermode":"closest"
          }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)


In [None]:
fourier_original = fourierFilter(x_original)
distances = {}

for rendition in renditions_list:
    x = np.array(renditions_df[rendition].values)
    extrapolation_rendition = fourierFilter(x)
    rendition_split = rendition.replace('../../stream/','').split('/')
    rendition_name = '{}-{}'.format(rendition_split[1].replace(original_asset_name, 'Video-1'),rendition_split[0])
    
    euclidean = distance.euclidean(fourier_original, extrapolation_rendition)
    cosine = distance.cosine(fourier_original,extrapolation_rendition)
    std = x.std()
    distances[rendition_name] = {'Euclidean': euclidean}
    
distances_filter_df = pd.DataFrame.from_dict(distances,orient='index').astype(float)

fig = tools.make_subplots(rows=1, cols=1)

counter = 0
for metric in distances_filter_df.columns:   
    counter += 1
    trace = go.Scatter(
                x = distances_filter_df[metric].index,
                y = distances_filter_df[metric].values,
                name = metric,
                mode='markers'
            )

    fig.append_trace(trace, 1, counter)

    
    fig['layout'].update(height=450, 
                         width=800, 
                         title='Fourier filtered {}'.format(displayed_metric), 
                         yaxis1=dict(
                                automargin=True,
                                 title="Distance"
                                ),
                         xaxis1=dict(
                                tickangle=60,
                                automargin=True
                                ),
                         xaxis2=dict(
                                tickangle=60,
                                automargin=True
                                ))
offline.iplot(fig)

In [None]:
for rendition in renditions_list:
    renditions_delta_df = renditions_df.diff()
    renditions_delta_delta_df = renditions_delta_df.diff()
renditions_delta_df = renditions_delta_df.dropna()
renditions_delta_delta_df = renditions_delta_delta_df.dropna()
display(renditions_delta_df.head())
display(renditions_delta_delta_df.head())

In [None]:
data=[]
for rendition in renditions_list:
    
    rendition_split = rendition.replace('../../stream/','').split('/')
    rendition_name = '{}-{}'.format(rendition_split[1].replace(original_asset_name, 'Video-1'),rendition_split[0])
    
    x = np.array(renditions_delta_delta_df[rendition].values)
    extrapolation = fourierFilter(x)
    
    trace = go.Scatter(
                x = np.arange(0, extrapolation.size),
                y = extrapolation,
                name = rendition_name,
                mode = 'lines'
            )

    data.append(trace)

layout = {"title":'First Fourier derivative {}'.format(displayed_metric), 
          "legend":{"x": .9, "y":0},
          "xaxis": {"title": "Frame", "automargin": True }, 
          "yaxis": {"title": displayed_metric},
          "hovermode":"closest"
          }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

In [None]:

x_original_delta = fourierFilter(np.array(renditions_delta_df[original_asset].values))

original = np.where(x_original_delta > np.mean(x_original_delta), 1, 0)
distances = {}

for rendition in renditions_list:
    rendition_split = rendition.replace('../../stream/','').split('/')
    rendition_name = '{}-{}'.format(rendition_split[1].replace(original_asset_name, 'Video-1'),rendition_split[0])
    
    x = fourierFilter(np.array(renditions_delta_df[rendition].values))
    fourier_rendition = np.where(x > np.mean(x), 1, 0)

    euclidean = distance.euclidean(original, fourier_rendition)
    cosine = distance.cosine(original,fourier_rendition)
    std = x.std()
    distances[rendition_name] = {'Euclidean': euclidean, 'STD': std}
    
distances_diff_df = pd.DataFrame.from_dict(distances,orient='index').astype(float)

fig = tools.make_subplots(rows=1, cols=2)

counter = 0
for metric in distances_diff_df.columns:   
    counter += 1
    trace = go.Scatter(
                x = distances_diff_df[metric].index,
                y = distances_diff_df[metric].values,
                name = metric,
                mode='markers'
            )

    fig.append_trace(trace, 1, counter)

fig['layout'].update(height=450, 
                         width=800, 
                         title='Distances first derivative Fourier {}'.format(displayed_metric), 
                         xaxis1=dict(
                                tickangle=60,
                                automargin=True
                                ),
                         xaxis2=dict(
                                tickangle=60,
                                automargin=True
                                )
                    )
offline.iplot(fig)