In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import re
import os

os.chdir('..')
os.chdir('results')

# Functions

In [2]:
def get_path(dataset='FordA', model_name='ResidualCNN', reg=True, exp_name=None):
    attack_type = 'fgsm_disc_attack' if reg else 'fgsm_attack'
    exp = '_' + exp_name if exp_name else ''
    
    path = '/'.join([dataset, model_name, 'attack', attack_type + exp])

    return path



def combined_df(dataset='FordA', model_name='ResidualCNN', reg=True, exp_name=None):

    path = get_path(dataset=dataset, model_name=model_name, reg=reg, exp_name=exp_name)
    
    files = os.listdir(path)
    files = [f for f in files if f.endswith('.csv')]

    pattern = r"(?<==).*?(?=.csv)"
    id_pattern = fr"(?<={dataset}_).*?(?=_alpha)"

    all_res = pd.DataFrame()

    for file in files:
        curr_res = pd.read_csv(path + '/' + file, index_col=0)

        curr_res['alpha'] = float(re.search(pattern, file)[0])
        curr_res['model_id'] = re.search(id_pattern, file)[0]

        curr_res.columns = [column.strip(' ') for column in curr_res.columns] 

        all_res = pd.concat([all_res, curr_res], axis=0)

    return all_res.reset_index(drop=True)

def aggregated_results(
    dataset='FordA', 
    model_name='ResidualCNN', 
    exp_name='',
    reg=True,  
    metr='F_EFF_CONC',
    metr_max=True,
    k=1,
    vis = False,
    params=['ACC', 'PROB_HID', 'ACC_DISC'],
    figsize=(20, 30),
    save_pic=False):

    all_df = combined_df(dataset=dataset, model_name=model_name, reg=reg, exp_name=exp_name)

    grouped = all_df.groupby(['eps', 'alpha'])
    result = grouped.apply(lambda x: x.nlargest(k, metr, keep='last') if metr_max else x.nsmallest(k, metr, keep='last'))


    if vis:
        
        fig, ax = plt.subplots(result['alpha'].nunique(), result['eps'].nunique(), figsize=figsize, sharey=True)
        fig.suptitle(f'{dataset}_{model_name}')

        all_alps = result['alpha'].unique()
        all_eps = result['eps'].unique()

        path = get_path(dataset=dataset, model_name=model_name, reg=reg, exp_name=exp_name)

        for i, row in result.iterrows():

            alp = int(row['alpha']) if row['alpha'].is_integer() else row['alpha']
            eps = row['eps']

            file_path = path + '/' + f'aa_res_{dataset}_{int(row["model_id"])}_alpha={alp}.csv'

            curr_df = pd.read_csv(file_path, index_col=0)
            curr_df.columns = [column.strip(' ') for column in curr_df.columns]

            curr_df = curr_df[curr_df['eps']==eps].set_index('step_id')

            eps_id = np.where(all_eps==eps)[0][0]
            alp_id = np.where(all_alps==alp)[0][0]

            ax[alp_id, eps_id].plot(curr_df[params])
            ax[alp_id, eps_id].axvline(int(row["step_id"]), ls='--', alpha=0.2, color='black')

            ax[alp_id, eps_id].set_title(f'eps={eps}, alpha={alp}, {metr}={row[metr]}, step={int(row["step_id"])}')
            ax[alp_id, eps_id].legend(params)
        
        fig.tight_layout()
        fig.subplots_adjust(top=0.95)

        if save_pic:
            plt.savefig(f'../{dataset}_{model_name}.jpeg', dpi=120)

    return result


def multiple_agg(models, datasets, exp_name='CLEAN', k=1):
    res = pd.DataFrame()

    for model in models:
        for ds in datasets:
            try:
                aa = aggregated_results(dataset=ds, model_name=model, exp_name=exp_name, k=k)
                aa['model'] = model
                aa['dataset'] = ds

                res = pd.concat([res, aa], axis=0)
            except:
                print(ds, model)

    return res

# Best attacks

In [3]:
models = ['LSTM', 'ResidualCNN', 'RNNA', 'TST', 'S4', 'PatchTST']
datasets = ['FordA', 'Strawberry', 'GunPointMaleVersusFemale', 'FreezerRegularTrain']

In [28]:
res_best = multiple_agg(models, datasets)

res_all = multiple_agg(models, datasets, k=10000)

GunPointMaleVersusFemale LSTM
FreezerRegularTrain LSTM
FordA TST
GunPointMaleVersusFemale LSTM
FreezerRegularTrain LSTM
FordA TST


In [1]:
# res_best['EFF'] = 1 - res_best['ACC']
# res_best['F_EFF_CONC'] = 2*res_best['EFF']*res_best['CONC']/(res_best['EFF']+res_best['CONC'])

## Best for each **dataset-model pair**

In [48]:
tmp = res_best.groupby(['dataset', 'model'])['F_EFF_CONC'].idxmax().to_frame()

tmp['eps'] = tmp['F_EFF_CONC'].apply(lambda x: x[0])
tmp['alpha'] = tmp['F_EFF_CONC'].apply(lambda x: x[1])
tmp['F_EFF_CONC'] = res_best.groupby(['dataset', 'model'])['F_EFF_CONC'].max()

In [49]:
ds_model = tmp.reset_index().pivot(index='model', columns='dataset')
ds_model

Unnamed: 0_level_0,F_EFF_CONC,F_EFF_CONC,F_EFF_CONC,F_EFF_CONC,eps,eps,eps,eps,alpha,alpha,alpha,alpha
dataset,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
LSTM,0.408074,,,0.911382,0.005,,,0.03,0.1,,,1.0
PatchTST,0.749966,0.938271,0.642873,0.987701,0.03,0.1,0.03,0.03,0.1,1.0,0.1,0.1
RNNA,0.230093,0.635999,0.926864,0.790253,0.005,0.03,0.1,0.005,0.1,10.0,0.1,1.0
ResidualCNN,0.295846,0.745052,0.260122,0.085544,0.005,0.1,0.005,0.005,0.01,0.001,0.01,10.0
S4,0.602761,0.931338,0.755909,0.844232,0.005,0.03,0.03,0.03,0.1,0.1,0.1,1.0
TST,,0.683118,0.737973,0.685776,,0.005,0.03,0.005,,0.1,0.1,0.1


In [56]:
tmp = res_best.groupby(['dataset', 'model'])['F_EFF_CONC'].idxmax().to_frame()

tmp['eps'] = tmp['F_EFF_CONC'].apply(lambda x: x[0])
tmp['alpha'] = tmp['F_EFF_CONC'].apply(lambda x: x[1])
tmp['F_EFF_CONC'] = res_best.groupby(['dataset', 'model'])['F_EFF_CONC'].max()

In [81]:
ds_model = tmp.reset_index().pivot(index='model', columns='dataset')
ds_model

Unnamed: 0_level_0,F_EFF_CONC,F_EFF_CONC,F_EFF_CONC,F_EFF_CONC,eps,eps,eps,eps,alpha,alpha,alpha,alpha
dataset,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
LSTM,0.408074,,,0.911382,0.005,,,0.03,0.1,,,1.0
PatchTST,0.749966,0.938271,0.642873,0.987701,0.03,0.1,0.03,0.03,0.1,1.0,0.1,0.1
RNNA,0.230093,0.635999,0.926864,0.790253,0.005,0.03,0.1,0.005,0.1,10.0,0.1,1.0
ResidualCNN,0.295846,0.745052,0.260122,0.085544,0.005,0.1,0.005,0.005,0.01,0.001,0.01,10.0
S4,0.602761,0.931338,0.755909,0.844232,0.005,0.03,0.03,0.03,0.1,0.1,0.1,1.0
TST,,0.683118,0.737973,0.685776,,0.005,0.03,0.005,,0.1,0.1,0.1


In [58]:
sof_ds_model = pd.read_csv('../data_model.csv', index_col=0)

total_ds_model = pd.concat([ds_model['F_EFF_CONC'], sof_ds_model], axis=1)
total_ds_model

Unnamed: 0_level_0,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry,Coffee,GunPoint,PowerCons,Wafer
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LSTM,0.408074,,,0.911382,,,,
PatchTST,0.749966,0.938271,0.642873,0.987701,1.0,0.8248,0.7114,
RNNA,0.230093,0.635999,0.926864,0.790253,1.0,0.4347,0.2525,0.1039
ResidualCNN,0.295846,0.745052,0.260122,0.085544,1.0,0.4716,0.6631,0.708
S4,0.602761,0.931338,0.755909,0.844232,1.0,1.0,0.3976,
TST,,0.683118,0.737973,0.685776,1.0,0.3554,0.926621,0.5989


In [59]:
# to_check = total_ds_model.drop(columns='model_params')

In [60]:
success_rate = 0.8

(to_check >= success_rate).sum().sum()/to_check.notna().sum().sum()

0.2857142857142857

## Visualize attacks

In [61]:
best_metr = tmp.reset_index().pivot(index='model', columns='dataset')

datasets = best_metr['F_EFF_CONC'].columns
models = best_metr.index

metr = ['ACC', 'PROB_HID', 'ACC_DISC']

save = False

In [11]:
# fig, ax = plt.subplots(len(datasets), len(models), figsize=(35, 20))

# for i, dataset in enumerate(datasets):
#     for j, model in enumerate(models):

#         eps = best_metr['eps'].loc[model, dataset]

#         alpha = best_metr['alpha'].loc[model, dataset]
#         alpha = int(alpha) if alpha.is_integer() else alpha

#         path = get_path(dataset, model, exp_name='CLEAN')

#         try:
#             curr_res = pd.read_csv(path + '/' + f'aa_res_{dataset}_0_alpha={alpha}.csv', index_col=0)
#             curr_res.columns = [col.strip(' ') for col in curr_res.columns]
            
#             curr_res = curr_res[curr_res['eps'] == eps].set_index('step_id')
#             ax[i, j].plot(curr_res[metr])
#             ax[i, j].set_title(f'{dataset}  {model}  F_EFF_CONC={curr_res["F_EFF_CONC"].max()}')
#             ax[i, j].legend(metr)

#         except:
#             print(model, dataset, alpha)
        
#         fig.tight_layout()
#         fig.subplots_adjust(top=0.95)

# if save:
#     fig.savefig('../img_1.jpeg', dpi=200)

# PARAMS COUNT

In [62]:
import sys

dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if dir1 not in sys.path:
    sys.path.append(dir1)
os.chdir('..')

from hydra import compose, initialize
from hydra.core.global_hydra import GlobalHydra
from omegaconf import OmegaConf

import pandas as pd

import torch

from src.config import get_model

CUDA extension for structured kernels (Cauchy and Vandermonde multiplication) not found. Install by going to extensions/kernels/ and running `python setup.py install`, for improved speed and memory efficiency. Note that the kernel changed for state-spaces 4.0 and must be recompiled.
Falling back on slow Cauchy and Vandermonde kernel. Install at least one of pykeops or the CUDA extension for better speed and memory efficiency.


In [63]:
params = pd.DataFrame()
ds_len = pd.Series()

In [64]:
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

for ds in datasets:
    for model in models:
        try:
            path = 'results' + '/' + ds + '/' + model + '/attack/fgsm_disc_attack_CLEAN'

            GlobalHydra.instance().clear()
            initialize(config_path= '../' + path + '/config_folder')
            cfg = compose('attack_run_config.yaml')

            attack_model_path = os.path.join(
                cfg["model_folder"],
                f"model_{cfg['model_id_attack']}_{cfg['dataset']['name']}.pt",
            )

            attack_model = get_model(
                cfg["attack_model"]["name"],
                cfg["attack_model"]["params"],
                path=attack_model_path,
                device=device,
                train_mode=cfg["attack_model"]["attack_train_mode"],
            )

            param_num = sum(p.numel() for p in attack_model.parameters())
            
            params.loc[model, ds] = param_num
            ds_len[ds] = cfg['dataset']['seq_len']

        except:
            print(ds, model, 'error')

FordA TST error
FreezerRegularTrain LSTM error
GunPointMaleVersusFemale LSTM error


In [65]:
def res_add_params(res_df):
    agg = res_df.reset_index(drop=True).copy(deep=True)

    for ds in agg['dataset'].unique():
        agg.loc[agg[agg.dataset==ds].index, 'seq_len'] = ds_len[ds]

    for ds in datasets:
        for model in models:
            idx = agg[(agg.model==model)&(agg.dataset==ds)].index

            agg.loc[idx, 'param_count'] = params.loc[model, ds]

    return agg

In [66]:
res_best = res_add_params(res_best)
res_all = res_add_params(res_all)

In [88]:
res_best.groupby('model')['EFF'].mean().sort_values(ascending=False)

model
LSTM           0.655139
PatchTST       0.637811
ResidualCNN    0.626454
S4             0.589456
RNNA           0.517914
TST            0.486565
Name: EFF, dtype: float64

### Dataset + params

In [19]:
params_models = concated_best.groupby('model')['param_count'].mean()/1000
params_ds = concated_best.groupby('dataset')['seq_len'].mean().to_frame().T.astype(int)

In [20]:
total_ds_model['model_params'] = params_models.round(2)
total_ds_model_meta = pd.concat([total_ds_model, params_ds], axis=0)

In [21]:
total_ds_model_meta

Unnamed: 0,FordA,FreezerRegularTrain,GunPointMaleVersusFemale,Strawberry,Coffee,GunPoint,PowerCons,Wafer,model_params
LSTM,0.4119,,,0.8624,,,,,36.1
PatchTST,0.8326,0.9013,0.6292,0.9757,1.0,0.8248,0.7114,,6345.22
RNNA,0.2341,0.5445,0.9936,0.6746,1.0,0.4347,0.2525,0.1039,439.52
ResidualCNN,0.2988,0.9996,0.278,0.0768,1.0,0.4716,0.6631,0.708,256.13
S4,0.5981,0.9871,0.8276,0.8072,1.0,1.0,0.3976,,47.02
TST,,0.8516,0.8054,0.5437,1.0,0.3554,0.926621,0.5989,843.73
seq_len,500.0,301.0,150.0,235.0,286.0,150.0,144.0,152.0,


# ALL CORRS

### Best res

In [37]:
cols = ['EFF', 'CONC', 'F_EFF_CONC', 'step_id', 'param_count', 'seq_len', 'eps', 'alpha', 'model']

In [38]:
sof_best = pd.read_csv('table_1.csv', index_col=0)[cols]
concated_best = pd.concat([res_best[cols], sof_best], axis=0)

In [39]:
best_corr = pd.get_dummies(concated_best, dtype=float).corr(method='spearman')[['EFF', 'CONC', 'F_EFF_CONC']]
best_corr

Unnamed: 0,EFF,CONC,F_EFF_CONC
EFF,1.0,-0.169494,0.297904
CONC,-0.169494,1.0,0.722695
F_EFF_CONC,0.297904,0.722695,1.0
step_id,0.151872,0.314933,0.10901
param_count,-0.125038,0.219792,0.151275
seq_len,0.311124,-0.152858,0.027619
eps,0.48199,-0.393571,-0.131355
alpha,-0.183786,0.325458,0.04318
model_LSTM,0.05162,-0.131199,-0.0874
model_PatchTST,0.093609,0.281576,0.308262


### All res

In [40]:
sof_all = pd.read_csv('table_1000.csv', index_col=0)[cols]
concated_all = pd.concat([res_all[cols], sof_all], axis=0)

In [41]:
all_corr = pd.get_dummies(concated_all, dtype=float).corr('spearman')[['EFF', 'CONC', 'F_EFF_CONC']]
all_corr

Unnamed: 0,EFF,CONC,F_EFF_CONC
EFF,1.0,-0.428013,-0.011821
CONC,-0.428013,1.0,0.751653
F_EFF_CONC,-0.011821,0.751653,1.0
step_id,0.284306,0.069537,0.165244
param_count,-0.159508,0.199459,0.13042
seq_len,0.277142,-0.171924,-0.050536
eps,0.480311,-0.372657,-0.133134
alpha,-0.369047,0.344751,0.123788
model_LSTM,0.114182,-0.113427,-0.055776
model_PatchTST,-0.022288,0.278769,0.310712
