## 1.- Imports, setup and configure
### 1.1.- Imports
Bring in the different dependencies from installed standard modules

In [None]:
import sys
import time
import glob
import os
import numpy as np
import pandas as pd
from scipy.spatial import distance

Now the ad-hoc created modules for this project. We use the jupyter magics %load_ext autoreload and %autoreload set to 2. Imported classes are located in the ../scripts folder of our volume

In [None]:
import sys
sys.path.insert(0, '../../scripts')
# load the autoreload extension
%load_ext autoreload
# Set extension to reload modules every time before executing code
%autoreload 2

from video_asset_processor import video_asset_processor

### 1.2.- Custom functions
Add the necessary custom functions for the notebook

In [None]:
def read_metric_log(path, metric):
    if metric == 'vmaf':
        with open(path) as f:
            for line in f:
                if '= ' in line:
                    return float(line.split('= ')[-1])
    if metric == 'ms-ssim':
        ms_ssim_df = pd.read_csv(path)
        return(ms_ssim_df['ms-ssim'].mean())

### 1.3.- Configure the inputs
Setup the needed parameters to pass to the functions

In [None]:
# Enumerate the list of metrics to extract
# -hash_euclidean
# -hash_cosine
# -hash_hamming
# -temporal_difference (this creates two output columns): 
#  -temporal_difference_euclidean 
#  -temporal_difference_cosine

metrics_list = ['temporal_difference', 'temporal_canny', 'histogram_distance']

renditions_folders = [
'1080p',
'1080p_watermark',
'1080p_flip_vertical',
'1080p_rotate_90_clockwise',
'720p',
'720p_watermark',
'720p_flip_vertical',
'720p_rotate_90_clockwise',
'480p',
'480p_watermark',
'480p_flip_vertical',
'480p_rotate_90_clockwise',
'360p',
'360p_watermark',
'360p_flip_vertical',
'360p_rotate_90_clockwise',
'240p',
'240p_watermark',
'240p_flip_vertical',
'240p_rotate_90_clockwise',
'144p',
'144p_watermark',
'144p_flip_vertical',
'144p_rotate_90_clockwise',
]
originals_path = '../../data/{}/'

## 2.- Iterate all assets in the data set and extract their metrics

In [None]:
metrics_dict = {}
list = os.listdir(originals_path.format('1080p')) # dir is your directory path
number_assets = len(list)
print ('Number of assets: {}'.format(number_assets))
count = 0

for original_asset in glob.iglob(originals_path.format('1080p') + '**', recursive=False):
    count += 1
    if os.path.isfile(original_asset): # filter dirs
        print('Processing asset {} of {}: {}'.format(count, number_assets, original_asset))
        start_time = time.time()
        renditions_list = []

        for folder in renditions_folders:
            rendition_folder = originals_path.format(folder)
            renditions_list.append(rendition_folder + os.path.basename(original_asset))

        asset_processor = video_asset_processor(original_asset, renditions_list, metrics_list)
        asset_metrics_dict = asset_processor.process()

        dict_of_df = {k: pd.DataFrame(v) for k,v in asset_metrics_dict.items()}

        metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False)
        metrics_df = metrics_df.rename(index=str, columns={"level_1": "frame_num", "level_0": "path"})
        
        renditions_dict = {}
        for rendition in renditions_list:
            rendition_dict = {}
            for metric in metrics_list:

                original_df = metrics_df[metrics_df['path']==original_asset][metric]
                original_df = original_df.reset_index(drop=True).transpose().dropna().astype(float)

                rendition_df = metrics_df[metrics_df['path']==rendition][metric]
                rendition_df = rendition_df.reset_index(drop=True).transpose().dropna().astype(float)

                if  'temporal' in metric:
                    x_original = np.array(original_df[rendition_df.index].values)
                    x_rendition = np.array(rendition_df.values)
                    std = std = metrics_df[metrics_df['path']==rendition]['temporal_difference'].std()
                    rendition_dict['{}-euclidean'.format(metric)] = distance.euclidean(x_original, x_rendition)
                    rendition_dict['{}-cosine'.format(metric)] = distance.cosine(x_original, x_rendition)
                    #rendition_dict['{}-series'.format(metric)] = x_rendition
                else:
                    rendition_dict[metric] = rendition_df.mean()
                
            renditions_dict[rendition] = rendition_dict

        metrics_dict[original_asset] = renditions_dict   

        elapsed_time = time.time() - start_time 
        print('Elapsed time:', elapsed_time)
        print('***************************')


## 3.- Extract aggregated metrics values to a pandas DataFrame

Once we have iterated through each and every asset of the dataset, it is time to drop the contents of the dictionary to a pandas DataFrame.
But before, other metrics computed by means of external scripts need to be collected (namely VMAF and MS-SSIM). Checkout Readme.md to see how to extract those metrics.

In [None]:
dict_of_df = {k: pd.DataFrame(v) for k,v in metrics_dict.items()}
metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False)

In [None]:
metrics_path = '/home/jovyan/work/data-analysis/output'
real_path = os.path.realpath(metrics_path)
extra_metrics = ['vmaf', 'ms-ssim']

for index,row in metrics_df.iterrows():
    for metric in extra_metrics:

        asset_name = row['level_0'].split('/')[-1].split('.')[0]
        attack = row['level_1'].split('/')[3]
        dimension = attack.split('_')[0].replace('p','')
        attack_name = attack.replace('{}p'.format(dimension), dimension)
        log_path = '{}/{}/{}/{}/{}_{}.log'.format(metrics_path, metric, attack_name, asset_name, asset_name, dimension)

        print('LEVEL 0', row['level_0'])
        print('LEVEL 1:', row['level_1'])
        print('ASSET NAME:', asset_name)
        print('ATTACK:', attack)
        print('DIMENSION', dimension)
        print('ATTACK NAME', attack_name)
        print('PATH:', log_path)
        
        if os.path.isfile(log_path): 
            print('ADDING:',log_path)
            print('*****************************')
            metric_value = read_metric_log(log_path, metric)
            metrics_df.at[index, metric] = metric_value
        else:
            print('Path not found')



In [None]:
metrics_df.to_csv('../output/metrics.csv')

In [None]:
metrics_df.head()