# Evaluation 

In [1]:
import numpy as np
import pandas as pd

## Load data from csv files

In [2]:
# Composite Error
df_comp = pd.read_csv('evaluation_composite.csv')
df_comp = df_comp.sort_values(by='Name')
# Harmonizer
df = pd.read_csv('evaluation_harmonizer.csv')
df_harm = df.sort_values(by='Name')
# DCCF
df = pd.read_csv('evaluation_DCCF.csv')
df_dccf = df.sort_values(by='Name')
# PCTNet CNN
df = pd.read_csv('evaluation_PCTNet_CNN.csv')
df_ours_CNN = df.sort_values(by='Name')
# PCTNet ViT
df = pd.read_csv('evaluation_PCTNet_ViT.csv')
df_ours_ViT = df.sort_values(by='Name')

## Evaluation fMSE, PSNR, MSE, SE 

In [4]:
def evaluate(df):
    print(f"fMSE: {np.mean(df['fMSE']):.2f} (+/-{np.std(df['fMSE']):.2f})")
    print(f"PSNR: {np.mean(df['PSNR']):.2f} (+/-{np.std(df['PSNR']):.2f})")
    print(f"MSE: {np.mean(df['MSE']):.2f} (+/-{np.std(df['MSE']):.2f})")
    print(f"SE: {np.mean(df['SE']):.2f} (+/-{np.std(df['SE']):.2f})")

In [11]:
evaluate(df_comp)

fMSE: 1462.45 (+/-1638.06)
PSNR: 31.39 (+/-7.02)
MSE: 177.99 (+/-425.09)
SE: 2958.43 (+/-10335.97)


In [18]:
evaluate(df_ours_ViT)

fMSE: 237.49 (+/-298.93)
PSNR: 39.29 (+/-5.53)
MSE: 18.84 (+/-47.17)
SE: 190.49 (+/-981.59)


In [15]:
evaluate(df_ours_CNN)

fMSE: 282.75 (+/-323.49)
PSNR: 38.30 (+/-5.63)
MSE: 24.02 (+/-55.50)
SE: 215.06 (+/-930.69)


In [16]:
evaluate(df_dccf)

fMSE: 302.56 (+/-342.24)
PSNR: 38.01 (+/-5.53)
MSE: 24.72 (+/-51.73)
SE: 226.54 (+/-780.98)


In [17]:
evaluate(df_harm)

fMSE: 339.20 (+/-382.93)
PSNR: 37.23 (+/-5.62)
MSE: 27.62 (+/-57.73)
SE: 2149.56 (+/-6231.62)


## Alternative Metrics

In [7]:
def create_addtional_columns(df, df_comp):
    df['MSE_comp'] = df_comp['MSE']
    df['fMSE_comp'] = df_comp['fMSE']
    df['PSNR_comp'] = df_comp['PSNR']
    df['MSE_diff'] = df['MSE'] - df['MSE_comp']
    df['fMSE_diff'] = df['fMSE'] - df['fMSE_comp']
    df['PSNR_diff'] = df['PSNR'] - df['PSNR_comp'] 

    def f(row):
        if row['Name'][0] == 'a':
            return 'HAdobe5k'
        elif row['Name'][0] == 'f':
            return 'HFlickr'
        elif row['Name'][0] == 'c':
            return 'HCOCO'
        elif row['Name'][0] == 'd':
            return 'Hday2night'
    df['Dataset'] = df.apply(f, axis=1)
    return df

In [8]:
df_ours_ViT = create_addtional_columns(df_ours_ViT, df_comp)
df_ours_CNN = create_addtional_columns(df_ours_CNN, df_comp)
df_dccf = create_addtional_columns(df_dccf, df_comp)
df_harm = create_addtional_columns(df_harm, df_comp)

Data exploration using different metrics is possible. For example, 20 worst harmonization results by fMSE:

In [9]:
df_ours_ViT.sort_values(by='fMSE')[-20:]

Unnamed: 0,Name,MSE,PSNR,fMSE,SE,SSIM,height,width,mask_area,MSE_comp,fMSE_comp,PSNR_comp,MSE_diff,fMSE_diff,PSNR_diff,Dataset
7334,d90000012-181_1_7,50.366405,31.109394,2430.560547,46.200096,0.991845,480,637,6336.0,871.71759,3217.138916,18.727046,-821.351185,-786.578369,12.382348,Hday2night
7290,d18897-20120428-164138_1_2,33.791149,32.842774,2483.406982,31.141924,0.994061,480,640,4180.0,184.658051,831.959229,25.467121,-150.866901,1651.447754,7.375653,Hday2night
6580,f5161_1_2,6.983235,39.690236,2491.846191,14.652056,0.999188,683,1024,1960.0,3.883795,187.6716,42.238239,3.09944,2304.174591,-2.548003,HFlickr
6494,f108_1_2,116.577835,27.464644,2511.050537,245.317072,0.980511,685,1024,32565.0,0.799886,171.240768,49.100515,115.777949,2339.809769,-21.635871,HFlickr
7177,f2690_1_2,1279.668335,17.059829,2527.803711,3007.32288,0.943544,765,1024,396566.0,135.555496,866.080261,26.809632,1144.112839,1661.72345,-9.749803,HFlickr
6634,f4552_1_1,14.315855,36.57263,2608.07373,30.037184,0.998143,683,1024,3839.0,24.507875,558.494324,34.237747,-10.19202,2049.579407,2.334884,HFlickr
6444,f2508_1_1,14.497422,36.517895,2837.96167,30.46268,0.998467,684,1024,3578.0,4.209856,758.403992,41.888131,10.287567,2079.557678,-5.370235,HFlickr
5923,c360951_1097742_1,174.701401,25.70784,2915.785156,143.2272,0.979228,427,640,16373.77,267.295044,4461.182617,23.860895,-92.593643,-1545.397461,1.846945,HCOCO
6581,f5161_1_1,8.413094,38.881246,3002.06665,17.652152,0.999035,683,1024,1960.0,5.465471,2120.081299,40.754527,2.947623,881.985352,-1.873281,HFlickr
7271,d1048-20120628-200951_1_3,16.902056,35.851408,3044.89917,15.382831,0.997464,477,636,1684.0,337.99115,1257.965332,22.84175,-321.089094,1786.933838,13.009658,Hday2night


# Percentage of Improvements by Metrics

In [55]:
def classification_eval(df, metric):
    print(f'Improvement of {metric}')
    out_title = 'All\t|\t'
    out_str = f"{len(df[df[metric] < 0])/len(df)*100:.2f}%\t|\t"
    for ds in ['HCOCO', 'HFlickr',  'HAdobe5k', 'Hday2night']:
        df_ds = df[df['Dataset']==ds]
        out_title += f'{ds}\t|\t' 
        out_str += f'{len(df_ds[df_ds[metric]<0])/len(df_ds)*100:.2f}%\t|\t'
        if ds in ['HAdobe5k', 'Hday2night']:
            out_title = out_title[:-3] + '|\t'
    print(out_title)
    print(out_str)

In [56]:
classification_eval(df_ours_ViT, 'fMSE_diff')
classification_eval(df_ours_ViT, 'PSNR_diff')
classification_eval(df_ours_ViT, 'MSE_diff')

Improvement of fMSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
95.11%	|	96.52%	|	86.96%	|	96.71%	|	74.44%	|	
Improvement of PSNR_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
5.73%	|	3.48%	|	23.43%	|	3.29%	|	7.52%	|	
Improvement of MSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
94.27%	|	96.52%	|	76.57%	|	96.71%	|	92.48%	|	


In [57]:
classification_eval(df_ours_CNN, 'fMSE_diff')
classification_eval(df_ours_CNN, 'PSNR_diff')
classification_eval(df_ours_CNN, 'MSE_diff')

Improvement of fMSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
88.48%	|	88.77%	|	76.69%	|	93.38%	|	72.93%	|	
Improvement of PSNR_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
26.18%	|	27.71%	|	57.25%	|	12.27%	|	9.02%	|	
Improvement of MSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
73.82%	|	72.29%	|	42.75%	|	87.73%	|	90.98%	|	


In [58]:
classification_eval(df_harm, 'fMSE_diff')
classification_eval(df_harm, 'PSNR_diff')
classification_eval(df_harm, 'MSE_diff')

Improvement of fMSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
85.67%	|	84.45%	|	74.76%	|	92.92%	|	75.19%	|	
Improvement of PSNR_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
27.70%	|	28.16%	|	58.21%	|	16.57%	|	3.76%	|	
Improvement of MSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
73.26%	|	71.87%	|	41.91%	|	86.62%	|	96.24%	|	


In [59]:
classification_eval(df_dccf, 'fMSE_diff')
classification_eval(df_dccf, 'PSNR_diff')
classification_eval(df_dccf, 'MSE_diff')

Improvement of fMSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
93.76%	|	93.86%	|	91.55%	|	95.19%	|	81.20%	|	
Improvement of PSNR_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
6.24%	|	6.14%	|	8.45%	|	4.81%	|	18.80%	|	
Improvement of MSE_diff
All	|	HCOCO	|	HFlickr	|	HAdobe5k|	Hday2night|	
93.76%	|	93.86%	|	91.55%	|	95.19%	|	81.20%	|	
