Demo for MANIQA metric setup

In [None]:
# Run theese commands in benchmark's root directory to setup MANIQA metric from it's source directory.
# You can also use your custom metric if it matches framework's call interface. An example implementation can be found in the model.py file in subjects/maniqa/

# !git submodule update --init --recursive
# !cd subjects/maniqa/src && git apply ../patches/maniqa.patch -v
# !cd subjects/maniqa && cp model.py src

In [None]:
# Load MANIQA weights
# !wget https://github.com/IIGROUP/MANIQA/releases/download/Koniq10k/ckpt_koniq10k.pt -P subjects/maniqa/
# Install MANIQA requirements
# !pip install -r subjects/maniqa/src/requirements.txt

In [5]:
# Change these parameters if you are using other metric. 
# 'bounds' in metric_cfg specify minimum and maximum possible metric values if such limits exist, approximate range of metric's values otherwise.
metric_path = './subjects/maniqa/src/'
path_to_weights = './subjects/maniqa/ckpt_koniq10k.pt'
metric_cfg = {'is_fr': False, 'bounds':{'low':0, 'high':1}, 'name':'maniqa'}
device = 'cuda:0'

In [6]:
import sys
sys.path.append(metric_path)
from model import MetricModel

In [7]:
model = MetricModel(model_path=path_to_weights, device=device)

Run benchmark attacks

In [8]:
import robustness_benchmark as rb

In [9]:
# Specify path to dataset and it's name. It must be a directory containing only images (or only videos)
dataset_path = './test_dataset/'
dataset_name = 'TEST'

# Specify directory where results of attacks will be stored. 
# Results will be organized as directories with names similar to the attacks.
# Files with names *metric_name*.csv will be saved in these directories.
result_save_dir = './test_results/'

# attacks_to_run = rb.interface.iterative_attacks
# attacks_to_run = rb.interface.uap_attacks
attacks_to_run = rb.interface.all_default_attacks
rb.interface.all_default_attacks.keys()

dict_keys(['ifgsm', 'mifgsm', 'amifgsm', 'std-fgsm', 'korhonen-et-al', 'madc', 'cumulative-uap', 'generative-uap', 'uap'])

In [None]:
# You can also add your custom attack 
# Make sure that it follows benchmark call interface.
# You can find default attacks implementation in robustness_benchmark.methods.<attack name>
# attacks_to_run['attack name'] = <attack callable>

In [None]:
# Results saved in this notebook's cell outputs are not representative, attacks were launched on small debug set of only 5 images.
rb.interface.run_attacks(
                        attacks_dict=attacks_to_run,
                        device=device, 
                        metric_model=model, 
                        metric_cfg=metric_cfg, 
                        dataset_names=[dataset_name], 
                        dataset_paths=[dataset_path], 
                        save_dir=result_save_dir)

Collect results from csv files

In [11]:
results_df = rb.interface.collect_results(result_save_dir, metric_cfg=metric_cfg)
results_df

Unnamed: 0,dataset,maniqa_clear,maniqa_attacked,maniqa_ssim,maniqa_psnr,maniqa_mse,attack
0,TEST,0.497564,1.108695,0.809924,30.214003,0.000952,amifgsm
1,TEST,0.568488,1.090278,0.906829,29.778993,0.001052,amifgsm
2,TEST,0.335649,1.060856,0.812977,30.184957,0.000958,amifgsm
3,TEST,0.510699,1.091831,0.793638,30.128886,0.000971,amifgsm
4,TEST,0.485868,0.956575,0.905383,29.416244,0.001144,amifgsm
...,...,...,...,...,...,...,...
47,TEST,0.497564,0.313744,0.567957,24.812781,0.003302,default-uap_VOC2012_amp0.8
50,TEST,0.568488,0.353059,0.783253,24.812781,0.003302,default-uap_VOC2012_amp0.8
53,TEST,0.335649,0.202528,0.578624,24.812781,0.003302,default-uap_VOC2012_amp0.8
56,TEST,0.510699,0.289065,0.572215,24.812781,0.003302,default-uap_VOC2012_amp0.8


Domain transformation

Note: domain transformation (based on Neural Optimal Transport, described ) currently only works with metrics used in benchmark, for which domain transformation is pretrained and stored in ```robustness_benchmark/models/models_to_mdtvsfa.pth```.\
For other metrics, you can still use ```evaluate_robustness()``` on results_df.

In [31]:
transformed_results_df = rb.interface.domain_transform(results_df, metrics=[metric_cfg['name']], batch_size=1000, device=device)
transformed_results_df

100%|██████████| 1/1 [00:00<00:00, 12.58it/s]

current metric:  maniqa





Unnamed: 0,dataset,maniqa_clear,maniqa_attacked,maniqa_ssim,maniqa_psnr,maniqa_mse,attack
0,TEST,0.593020,1.385626,0.809924,30.214003,0.000952,amifgsm
1,TEST,0.691731,1.360014,0.906829,29.778993,0.001052,amifgsm
2,TEST,0.316637,1.326889,0.812977,30.184957,0.000958,amifgsm
3,TEST,0.605928,1.363978,0.793638,30.128886,0.000971,amifgsm
4,TEST,0.571077,1.199576,0.905383,29.416244,0.001144,amifgsm
...,...,...,...,...,...,...,...
47,TEST,0.593067,0.286116,0.567957,24.812781,0.003302,default-uap_VOC2012_amp0.8
50,TEST,0.699335,0.339540,0.783253,24.812781,0.003302,default-uap_VOC2012_amp0.8
53,TEST,0.321193,0.115863,0.578624,24.812781,0.003302,default-uap_VOC2012_amp0.8
56,TEST,0.616140,0.231154,0.572215,24.812781,0.003302,default-uap_VOC2012_amp0.8


Metric robustness evaluation

In [32]:
# W/o domain transformation
# rb.interface.evaluate_robustness(results_df, attacks=list(transformed_results_df.attack.unique()) + ['all', 'iterative', 'uap'])

rb.interface.evaluate_robustness(transformed_results_df, attacks=list(transformed_results_df.attack.unique()) + ['all', 'iterative', 'uap'])

Unnamed: 0,metric,attack,energy_distance_score,normalized_absolute_gain,normalized_relative_gain,relative_gain_classic,robustness_score,wasserstein_score
0,maniqa,amifgsm,1.879,"2.016 (1.848, 2.184)","1.364 (1.055, 1.674)","1.563 (1.171, 1.956)","-0.387 (-0.422, -0.351)",2.016
1,maniqa,cumulative-uap_COCO_amp0.2,-0.063,"-0.011 (-0.021, -0.001)","-0.007 (-0.012, -0.001)","-0.007 (-0.013, -0.001)","2.101 (1.594, 2.609)",-0.012
2,maniqa,cumulative-uap_VOC2012_amp0.2,-0.325,"-0.163 (-0.229, -0.096)","-0.099 (-0.134, -0.064)","-0.112 (-0.150, -0.074)","0.766 (0.567, 0.966)",-0.163
3,maniqa,cumulative-uap_COCO_amp0.4,-0.187,"-0.057 (-0.093, -0.022)","-0.034 (-0.053, -0.014)","-0.038 (-0.059, -0.016)","1.289 (1.010, 1.567)",-0.057
4,maniqa,cumulative-uap_VOC2012_amp0.4,-0.654,"-0.440 (-0.532, -0.349)","-0.276 (-0.328, -0.224)","-0.315 (-0.379, -0.252)","0.284 (0.183, 0.385)",-0.44
5,maniqa,cumulative-uap_COCO_amp0.8,-0.253,"-0.051 (-0.138, 0.037)","-0.016 (-0.082, 0.050)","-0.015 (-0.093, 0.063)","0.960 (0.760, 1.160)",-0.108
6,maniqa,cumulative-uap_VOC2012_amp0.8,-1.083,"-0.856 (-1.003, -0.709)","-0.525 (-0.564, -0.486)","-0.589 (-0.633, -0.545)","-0.007 (-0.126, 0.112)",-0.856
7,maniqa,generative-uap_COCO_amp0.2,-0.112,"-0.034 (-0.050, -0.018)","-0.022 (-0.034, -0.011)","-0.025 (-0.038, -0.012)","1.518 (1.231, 1.806)",-0.034
8,maniqa,generative-uap_VOC2012_amp0.2,-0.107,"-0.034 (-0.060, -0.008)","-0.023 (-0.039, -0.007)","-0.026 (-0.045, -0.008)","1.413 (1.130, 1.696)",-0.034
9,maniqa,generative-uap_COCO_amp0.4,-0.241,"-0.106 (-0.156, -0.057)","-0.066 (-0.093, -0.039)","-0.074 (-0.105, -0.043)","1.043 (0.706, 1.379)",-0.106
