# Dataset Open Part Results Reproduction for Paper
## "Video compression dataset and benchmark of learning-based video-quality metrics"

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
from statsmodels.stats.weightstats import DescrStatsW
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.legend import Legend
import json
import scipy.stats as stats

In [None]:
df = pd.read_csv('metric_scores_open.csv')  # read dataframe with metrics data
with open("content_categories.json") as f:
    cat_dict = json.load(f) # read dict with "content category"-"list of corresponding videos" mapping 

### Table with correlation coefficients for each samples group creation

### (!) Note that single codecs and bitrate ranges are available only for the full dataset

In [None]:
# choose compression standards
codec_list = ['all'] #['all', 'h265', 'av1', 'vvc'] - for full (open + hidden parts) dataset

#choose bitrate categories
bit_cat_dict = ['all'] #['all', 'high', 'low'] - for full (open + hidden parts) dataset

corrs = pd.DataFrame(columns=['corr', 'sample_size'] + list(df.columns[8:-1]))
for comp in ['2021','2019','2020','ugc']:
    print(comp, 'Comparison')

    _df = df[df.comparison == comp]
    for seq in tqdm.tqdm(_df.sequence.unique()):

        for preset in _df.preset.unique():
            for codec in codec_list: 
                for cat in bit_cat_dict: 
                    
                    flt = ((df.comparison == comp) & (df.sequence == seq) & (df.preset == preset))
                    if cat != 'all':
                        flt = flt & (df['bitrate'] == cat)
                    if codec != 'all':
                        flt = flt & (df['standard'] == codec)
                    
                    subj = df[flt]
                    
                    if (subj.shape[0] < 3):
                        continue
                    for corr in ['spearman', 'kendall']:
                        if (corr == 'kendall'):
        
                            # Pandas KROCC implementation isn't stable
                            # in the presence of duplicates

                            dct = {
                                'comparison': str(comp),
                                'sequence': seq,
                                'preset': preset,
                                'corr': corr,
                                'standard': codec,
                                'bitrate' : cat,
                                'sample_size': subj.shape[0],
                                **subj.corr(method=lambda x, y: stats.kendalltau(x, y)[0])['Subjective score']
                            }
                        
                        else:
                            
                            dct = {
                                'comparison': str(comp),
                                'sequence': seq,
                                'preset': preset,
                                'corr': corr,
                                'standard': codec,
                                'bitrate' : cat,
                                'sample_size': subj.shape[0],
                                **subj.corr(method=corr)['Subjective score']
                            }
                 
                        corrs = corrs.append(dct, ignore_index=True)

### Mean and confidence intervals computing 

In [None]:
def weigh_func(col, weights, mode='mean'):
    st = DescrStatsW(col, weights=weights)
    l = st.mean - 1.96 * st.std_mean
    
    u = st.mean + 1.96 * st.std_mean
    if st.mean > 0:
        l = np.clip(l, 0, np.arctanh(0.99999))
        u = np.clip(u, 0, np.arctanh(0.99999))
    else:
        t = u
        u = np.clip(l, -np.arctanh(0.99999), 0)
        l = np.clip(t, -np.arctanh(0.99999), 0)

    if mode == 'mean':
        return st.mean
    elif mode == '-se':
        return l
    elif mode == '+se':
        return u
    else:
        raise ValueError('Unknown mode')

### Our dataset subsets which were presented in the paper

### (!) Note that only "FULL DATASET" is available for the open part

In [None]:
pools = dict()
pools["FULL DATASET"] = ('all', 'all','all',cat_dict["FULL DATASET"])
pools["LOW BITRATE"] = ('all', 'low','all',cat_dict["FULL DATASET"])
pools["HIGH BITRATE"] = ('all', 'high','all',cat_dict["FULL DATASET"])
pools["H.265"] = ('all', 'all','h265',cat_dict["FULL DATASET"])
pools["AV1"] = ('all', 'all','av1',cat_dict["FULL DATASET"])
pools["VVC"] = ('all', 'all','vvc',cat_dict["FULL DATASET"])
pools["UGC"] = ('ugc','all','all', cat_dict["FULL DATASET"])
pools["SHAKING"] = ('all','all','all', cat_dict["shaking"])
pools["SPORTS"] = ('all','all','all', cat_dict["sports"])
pools["NATURE"] = ('all','all','all', cat_dict["nature"])
pools["GAMING and ANIMATION"] = ('all','all','all', cat_dict["gaming_animation"])

### Correlation options choice

# ==============================================================

In [None]:
corr = 'spearman' #, 'kendall'
pool = pools["FULL DATASET"]

# ==============================================================

### Final results generation

In [None]:
comp, cat, codec, seq = pool
preset = 'all'

min_samples_srocc = 15
min_samples_krocc = 6
cols = corrs.columns[2:-6]

if (corr == "spearman"):
    min_samples = min_samples_srocc
else:
    min_samples = min_samples_krocc

flt = (corrs['corr'] == corr)

if type(comp) is list:
    flt = flt & (corrs.comparison.isin(comp))
elif comp != 'all':
    flt = flt & (corrs.comparison == comp)

if type(cat) is list:
    flt = flt & (corrs.bitrate.isin(cat))
else:
    flt = flt & (corrs.bitrate == cat)

if type(seq) is list:
    flt = flt & (corrs.sequence.isin(seq))
elif seq != 'all':
    flt = flt & (corrs.sequence == seq)

if type(codec) is list:
    flt = flt & (corrs.standard.isin(codec))
else:
    flt = flt & (corrs.standard == codec)

if type(preset) is list:
    flt = flt & (corrs.preset.isin(preset))
elif preset != 'all':
    flt = flt & (corrs.preset == preset)

flt = flt & (corrs.sample_size >= min_samples)


d_mean_correlation = (corrs[flt][cols].apply(lambda x: np.arctanh(x)).replace([np.inf, -np.inf], [np.arctanh(0.99), np.arctanh(-0.99)])\
.apply(lambda x: weigh_func(x, corrs[flt]['sample_size'], 'mean')))\
.apply(lambda x: np.tanh(x)).abs().replace([0.99], 1).sort_values(ascending=False)

d_ci_lower_bound = (corrs[flt][cols].apply(lambda x: np.arctanh(x)).replace([np.inf, -np.inf], [np.arctanh(0.99), np.arctanh(-0.99)])\
.apply(lambda x: weigh_func(x, corrs[flt]['sample_size'], '-se')))\
.apply(lambda x: np.tanh(x)).abs().replace([0.99], 1).sort_values(ascending=False)

d_ci_upper_bound = (corrs[flt][cols].apply(lambda x: np.arctanh(x)).replace([np.inf, -np.inf], [np.arctanh(0.99), np.arctanh(-0.99)])\
.apply(lambda x: weigh_func(x, corrs[flt]['sample_size'], '+se')))\
.apply(lambda x: np.tanh(x)).abs().replace([0.99], 1).sort_values(ascending=False)

In [None]:
d_final_correlation = pd.concat([d_ci_lower_bound, d_mean_correlation, d_ci_upper_bound], axis=1).reset_index()
d_final_correlation.columns = ["Metric", "CI Lower Bound", "Mean "+ corr[0].upper() + corr[1:] + " Correlation", "CI Upper Bound"]

In [None]:
d_final_correlation