In [1]:
import glob, os
import pandas as pd

In [2]:
%run -i '../scripts/video_asset_processor.py' 

In [24]:
def read_metric_log(path, metric):
    if metric == 'vmaf':
        with open(path) as f:
            for line in f:
                if '= ' in line:
                    return float(line.split('= ')[-1])
    if metric == 'ms-ssim':
        ms_ssim_df = pd.read_csv(path)
        return(ms_ssim_df['ms-ssim'].mean())

In [3]:
metrics_dict = {}
originals_path = '../data/{}/'
for original_asset in glob.iglob(originals_path.format(7) + '**', recursive=False):
    if os.path.isfile(original_asset): # filter dirs
        try:
            start_time = time.time()
            renditions_list = []
            
            #renditions_list.append(original_asset)
            for i in range(3,7):
                rendition_folder = originals_path.format(i)
                renditions_list.append(rendition_folder + os.path.basename(original_asset))
            asset_processor = video_asset_processor(original_asset, renditions_list)
            asset_processor.display = False
            asset_metrics_dict = asset_processor.process()

            dict_of_df = {k: pd.DataFrame(v) for k,v in asset_metrics_dict.items()}
            df = pd.concat(dict_of_df, axis=1).transpose()
            metrics = ['cosine', 'euclidean', 'hamming', 'psnr', 'ssim']
            grouped_df = df.groupby(['dimensions'] + metrics, as_index=False).count()
            renditions_dict = {}
            for rendition in df['dimensions'].unique():
                rendition_dict = {}
                for metric in metrics:
                    rendition_dict[metric] = grouped_df[grouped_df['dimensions']==rendition][metric].mean()
                renditions_dict[rendition] = rendition_dict
            metrics_dict[original_asset] = renditions_dict   
            
            elapsed_time = time.time() - start_time 
            print('Elapsed time:', elapsed_time)
        except:
            print('Failed to process asset:', original_asset)

Processing asset: ../data/7/7VbrPlIuxqs.mp4
Elapsed time: 63.949406147003174
Processing asset: ../data/7/VZpNs8Ob5XU.mp4
Cleaning up
Elapsed time: 75.74939942359924
Processing asset: ../data/7/ckO1MEL4caU.mp4
Cleaning up
Elapsed time: 91.08229351043701
Processing asset: ../data/7/kFy8E_NiLY0.mp4
Cleaning up
Elapsed time: 31.504600286483765
Processing asset: ../data/7/CY78yHv7LaU.mp4
Cleaning up
Elapsed time: 55.598493814468384
Processing asset: ../data/7/RQQv6iSeLdE.mp4
Cleaning up
Failed to process asset: ../data/7/RQQv6iSeLdE.mp4
Processing asset: ../data/7/R_GiYv8zFvs.mp4
Cleaning up
Elapsed time: 64.74546718597412
Processing asset: ../data/7/U8d1gDaRtuQ.mp4
Cleaning up
Elapsed time: 41.542884349823
Processing asset: ../data/7/Co_BCnun9O8.mp4
Cleaning up
Elapsed time: 41.504942178726196
Processing asset: ../data/7/pWtxt4YlI_A.mp4
Cleaning up
Elapsed time: 60.771886348724365
Processing asset: ../data/7/DqaVOuDkh6E.mp4
Cleaning up
Elapsed time: 32.128148555755615
Processing asset: ../

  dist = 1.0 - uv / np.sqrt(uu * vv)


Elapsed time: 30.49546790122986
Processing asset: ../data/7/4sCVouieJUc.mp4
Cleaning up
Elapsed time: 35.758458375930786
Processing asset: ../data/7/bexPQO9gkSw.mp4
Cleaning up
Elapsed time: 55.89747476577759
Processing asset: ../data/7/YRXsFtqOeAI.mp4
Cleaning up
Elapsed time: 51.93795204162598
Processing asset: ../data/7/H6Yizv1gwho.mp4
Cleaning up
Elapsed time: 96.33862519264221
Processing asset: ../data/7/NF58uVhAEYk.mp4
Cleaning up
Elapsed time: 69.22758674621582
Processing asset: ../data/7/jfagDfeNWhU.mp4
Cleaning up
Elapsed time: 42.28008699417114
Processing asset: ../data/7/XgcuOXhDKWs.mp4
Cleaning up
Elapsed time: 62.771058320999146
Processing asset: ../data/7/WborWpqHw8E.mp4
Cleaning up
Elapsed time: 86.76258945465088
Processing asset: ../data/7/zsOUBiGssko.mp4
Cleaning up
Elapsed time: 58.286794900894165
Processing asset: ../data/7/YdsFj5VZxso.mp4
Cleaning up
Elapsed time: 63.3607439994812
Processing asset: ../data/7/AKNXfemrTh4.mp4
Cleaning up
Elapsed time: 95.2504661083221

## Extract aggregated metrics values to a pandas DataFrame

Once we have iterated through each and every asset of the dataset, it is time to drop the contents of the dictionary to a pandas DataFrame.
But before, other metrics computed by means of external scripts need to be collected (namely VMAF and MS-SSIM). Checkout Readme.md to see how to extract those metrics.

In [8]:
dict_of_df = {k: pd.DataFrame(v) for k,v in metrics_dict.items()}
metrics_df = pd.concat(dict_of_df, axis=1).transpose().reset_index(inplace=False)

In [55]:
metrics_path = '../output'
real_path = os.path.realpath(metrics_path)
extra_metrics = ['vmaf', 'ms-ssim']

for index,row in metrics_df.iterrows():
    for metric in extra_metrics:
    
        asset_name = row['level_0'].split('/')[-1].split('.')[0]
        dimension = row['level_1'].split(':')[0]
        
        log_path = '{}/{}/{}/{}/{}_{}.log'.format(real_path, metric, dimension, asset_name, asset_name, dimension)
        if os.path.isfile(log_path): # filter dirs
            metric_value = read_metric_log(log_path, metric)
            metrics_df.at[index, metric] = metric_value
     


In [57]:
metrics_df.to_csv('../output/metrics.csv')