## 1.1 Imports and setup

Import the different dependencies from installed standard modules

In [None]:
import sys
import glob, os
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline

from scipy.spatial import distance
from scipy import linalg

%matplotlib inline
offline.init_notebook_mode()
pd.options.display.float_format = '{:.6f}'.format

Now the ad-hoc created modules for this project. We use the jupyter magics %load_ext autoreload and %autoreload set to 2.
Imported classes are located in the ../scripts folder of our volume

In [None]:
sys.path.insert(0, '../scripts/')
# load the autoreload extension
%load_ext autoreload
# Set extension to reload modules every time before executing code
%autoreload 2

from video_asset_processor import video_asset_processor

## 1.2 Process one asset and a number of renditions for analysis

In this notebook we are evaluating a single asset and comparing the behavior of different renditions made at different resolutions and bitrates (500kbps and 250kbps) and with a watermark (@500kbps).
The video_asset_processor.process function returns a dictionary containing the time history of the defined metrics (in this case only pixel difference ratio).

In [None]:
original_asset = '../data/7/bexPQO9gkSw.mp4'
renditions_list = ['../data/6/bexPQO9gkSw.mp4',
                   '../data/7.1/bexPQO9gkSw.mp4',
                   '../data/7.2/bexPQO9gkSw.mp4',
                   '../data/7.3/bexPQO9gkSw.mp4'
                  ]
metrics_list = ['temporal_difference']
asset_processor = video_asset_processor(original_asset, renditions_list, metrics_list)
asset_processor.display = False
asset_processor.compute_time_history = True
asset_metrics_dict = asset_processor.process()
renditions_list.append('original')

Rearrange the obtained data and put it in a pandas DataFrame for easier handling.

In [None]:
dict_of_df = {k: pd.DataFrame(v) for k,v in asset_metrics_dict.items()}
metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False)

### 1.2.1 Plot the results: single original asset

We will be using plotly's excellent library to output the measured instantaneous difference between a frame and its subsequent

In [None]:
data_df = metrics_df[metrics_df['level_1']=='original']

trace = go.Scatter(
        x = data_df['level_0'],
        y = data_df['temporal_difference'],
        name = 'Original',
        mode = 'lines'
    )

data = [trace]
layout = {"title": "Temporal Pixel Difference Ratio:", 
      "legend":{"x": .6, "y":.95},
      "xaxis": {"title": "Frame", }, 
      "yaxis": {"title": "Temporal pixel difference ratio"},
      "hovermode":"closest"
      }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

### 1.2.2 Plot the results: compare against the created renditions

In [None]:
data=[]

for rendition in renditions_list:
    
    data_df = metrics_df[metrics_df['level_1']==rendition]

    trace = go.Scatter(
            x = data_df['level_0'],
            y = data_df['temporal_difference'],
            name = rendition,
            mode = 'lines'
        )

    data.append(trace)
    
layout = {"title": "Temporal Pixel Difference Ratio:", 
          "legend":{"x": .6, "y":.95},
          "xaxis": {"title": "Frame", }, 
          "yaxis": {"title": "Temporal pixel difference ratio"},
          "hovermode":"closest"
          }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

## 1.3 Compare renditions by measuring different distances between them
We will treat the resulting time series of instant pixel differences as a vector that enables us to compare different renditions.
In order to be able to compare their behaviors individually we need a single scalar to do it. This will be the Euclidean distance. Other methods for analyzing time series similarity exist, but it happens so that Euclidean distance does a good job in our case. More sophysticated methods like Dynamic Time Warping (DWT) are in order when the characteristics of the time series present delayed patterns, as they allow for many-to-one point comparisions. In our analysis, only point-to-point distances aremore efficient.

### 1.3.1 Rearrange the time series dataframe

In [None]:
renditions_df = pd.DataFrame()
frames = []
for rendition in renditions_list:

    rendition_df = metrics_df[metrics_df['level_1']==rendition]['temporal_difference']

    
    rendition_df = rendition_df.reset_index(drop=True).transpose()
    frames.append(rendition_df)

    

renditions_df = pd.concat(frames,axis=1)
renditions_df.columns=renditions_list
renditions_df = renditions_df.astype(float)
display(renditions_df.head())
display(renditions_df.describe())

### 1.3.1 Compute the cosine distance and the euclidean distance of the raw time series

In [None]:
covmx = renditions_df.T.cov()
invcovmx = linalg.pinv(covmx)

x_original = np.array(renditions_df['original'].values)
distances = {}

for rendition in renditions_list:
    x = np.array(renditions_df[rendition].values)

    euclidean = distance.euclidean(x_original, x)
    cosine = distance.cosine(x_original,x)
    mahalanobis = distance.mahalanobis(x_original, x, invcovmx)
    
    distances[rendition] = {'Euclidean': euclidean, 'Cosine': cosine, 'Mahalanobis': mahalanobis}
distances_raw_df = pd.DataFrame.from_dict(distances,orient='index')
display(distances_raw_df.sort_values(by=['Cosine']))
display(distances_raw_df.sort_values(by=['Euclidean']))

### Compute the cosine distances and euclidean distance of the smoothed time series

In [None]:
def fourierFilter(x):
    n = x.size
    n_harm = 50                     # number of harmonics in model
    t = np.arange(0, n)
    p = np.polyfit(t, x, 1)         # find linear trend in x
    x_notrend = x - p[0] * t        # detrended x
    x_freqdom = np.fft.fft(x_notrend)  # detrended x in frequency domain
    f = np.fft.fftfreq(n)              # frequencies
    indexes = list(range(n))
    # sort indexes by frequency, lower -> higher
    indexes.sort(key = lambda i: np.absolute(f[i]))
 
    t = np.arange(0, n)
    restored_sig = np.zeros(t.size)
    for i in indexes[:1 + n_harm * 2]:
        ampli = np.absolute(x_freqdom[i]) / n   # amplitude
        phase = np.angle(x_freqdom[i])          # phase
        restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
    return restored_sig + p[0] * t
    

In [None]:
data=[]
for rendition in renditions_list:
    x = np.array(renditions_df[rendition].values)
    extrapolation = fourierFilter(x)

    trace = go.Scatter(
                x = np.arange(0, extrapolation.size),
                y = extrapolation,
                name = rendition,
                mode = 'lines'
            )

    data.append(trace)

layout = {"title": "Temporal pixel difference ratio:", 
          "legend":{"x": .8, "y":.95},
          "xaxis": {"title": "Frame", }, 
          "yaxis": {"title": "Temporal pixel difference ratio"},
          "hovermode":"closest"
          }
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)


In [None]:
x_original = np.array(renditions_df['original'].values)
extrapolation_original = fourierFilter(x_original)
distances = {}

for rendition in renditions_list:
    x = np.array(renditions_df[rendition].values)
    extrapolation_rendition = fourierFilter(x)

    euclidean = distance.euclidean(extrapolation_original, extrapolation_rendition)
    cosine = distance.cosine(extrapolation_original,extrapolation_rendition)
    mahalanobis = distance.mahalanobis(extrapolation_original, extrapolation_rendition, invcovmx)
    
    distances[rendition] = {'Euclidean': euclidean, 'Cosine': cosine, 'Mahalanobis': mahalanobis}
    
distances_df = pd.DataFrame.from_dict(distances,orient='index').astype(float)

display(distances_df)
display(distances_df.describe())

In [None]:
normalized_df=(distances_df-distances_df.mean())/distances_df.std()
display(normalized_df)

In [None]:
normalized_df=(distances_df-distances_df.min())/(distances_df.max()-distances_df.min())
display(normalized_df)

In [None]:
normalized_df = (distances_raw_df-distances_raw_df.mean())/distances_raw_df.std()
display(normalized_df)

In [None]:
normalized_df=(distances_raw_df-distances_raw_df.min())/(distances_raw_df.max()-distances_raw_df.min())
display(normalized_df)