In [None]:
import glob, os
import pandas as pd

In [None]:
%run -i '../scripts/video_asset_processor.py' 

In [None]:
def read_metric_log(path, metric):
    if metric == 'vmaf':
        with open(path) as f:
            for line in f:
                if '= ' in line:
                    return float(line.split('= ')[-1])
    if metric == 'ms-ssim':
        ms_ssim_df = pd.read_csv(path)
        return(ms_ssim_df['ms-ssim'].mean())

In [None]:
metrics_dict = {}
metrics_list = ['temporal_difference']

renditions_folders = [
    '7',   # Original 1080p
    '7.1', # Original 1080p, with watermark
    '7.2', # Original 1080p, vertical flip
    '6',   # 720
    '6.1', # 720, with watermark
    '6.2', # 720, vertical flip
    '5',   # 480p
    '5.1', # 480p, with watermark
    '5.2', # 480p, vertical flip
    '4',   # 360p
    '4.1', # 360p, with watermark
    '4.2', # 360p, vertical flip
    '3',   # 240p
    '3.1', # 240p, with watermark
    '3.2', # 240p, vertical flip
    '2.1', # 144p, with watermark
    '2.2', # 144p, vertical flip
]
originals_path = '../data/{}/'
for original_asset in glob.iglob(originals_path.format(7) + '**', recursive=False):
    if os.path.isfile(original_asset): # filter dirs
        #try:
            start_time = time.time()
            renditions_list = []
            
            #renditions_list.append(original_asset)
            for i in renditions_folders:
                rendition_folder = originals_path.format(i)
                renditions_list.append(rendition_folder + os.path.basename(original_asset))
            asset_processor = video_asset_processor(original_asset, renditions_list, metrics_list)
            asset_processor.display = False
            asset_processor.compute_time_history = True
            asset_metrics_dict = asset_processor.process()

            dict_of_df = {k: pd.DataFrame(v) for k,v in asset_metrics_dict.items()}
            df = pd.concat(dict_of_df, axis=1).transpose()

            grouped_df = df.groupby(['dimensions'] + metrics_list, as_index=False).count()
            renditions_dict = {}
            for rendition in df['dimensions'].unique():
                rendition_dict = {}
                for metric in metrics_list:
                    rendition_dict[metric] = grouped_df[grouped_df['dimensions']==rendition][metric].mean()
                renditions_dict[rendition] = rendition_dict
            metrics_dict[original_asset] = renditions_dict   
            
            elapsed_time = time.time() - start_time 
            print('Elapsed time:', elapsed_time)
        #except Exception as err:
        #    print('Failed to process asset:', original_asset, err)

In [None]:
dict_of_df

## Extract aggregated metrics values to a pandas DataFrame

Once we have iterated through each and every asset of the dataset, it is time to drop the contents of the dictionary to a pandas DataFrame.
But before, other metrics computed by means of external scripts need to be collected (namely VMAF and MS-SSIM). Checkout Readme.md to see how to extract those metrics.

In [None]:
dict_of_df = {k: pd.DataFrame(v) for k,v in metrics_dict.items()}
metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False)

In [None]:
metrics_path = '../output'
real_path = os.path.realpath(metrics_path)
extra_metrics = ['vmaf', 'ms-ssim']

for index,row in metrics_df.iterrows():
    for metric in extra_metrics:
    
        asset_name = row['level_0'].split('/')[-1].split('.')[0]
        dimension = row['level_1'].split(':')[0]
        
        log_path = '{}/{}/{}/{}/{}_{}.log'.format(real_path, metric, dimension, asset_name, asset_name, dimension)
        if os.path.isfile(log_path): # filter dirs
            metric_value = read_metric_log(log_path, metric)
            metrics_df.at[index, metric] = metric_value
     


In [None]:
metrics_df.to_csv('../output/metrics.csv')