In [103]:
import pandas as pd
from pathlib import Path
import numpy as np
import random

from sklearn.preprocessing import StandardScaler

In [104]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch

In [105]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cuda


In [None]:
from captum.attr import IntegratedGradients, DeepLift, NoiseTunnel, FeatureAblation, FeaturePermutation, GuidedBackprop, Saliency, InputXGradient, ShapleyValueSampling

from captum.metrics import sensitivity_max, infidelity

In [108]:
def annotate_attributions(attributions, columns):
    return pd.DataFrame(attributions.cpu().detach().numpy(), columns = columns)

In [109]:
def calculate_avg_attributions(attributions, axis=0):
    return attributions.mean(axis=axis).sort_values(ascending=False).to_frame(name="avg_attribution_coeff")

In [110]:
# Rank features by their absolute coefficient value to identify the most important ones
def rank_features_by_importance(summarized_attributions_df):
    summarized_attributions_df["abs_avg_attribution_coeff"] = summarized_attributions_df["avg_attribution_coeff"].abs()
    summarized_attributions_df["feature_importance_rank"] = summarized_attributions_df["abs_avg_attribution_coeff"].rank(ascending=False)
    summarized_attributions_df.drop(["abs_avg_attribution_coeff"], axis=1, inplace=True)

    return summarized_attributions_df

In [111]:
#For each feature, show its relative order (1 - most positive influence, n - least positive influence)
def add_relative_order_of_features(summarized_attributions_df):
    summarized_attributions_df = summarized_attributions_df.sort_values(by="avg_attribution_coeff", ascending=False)
    summarized_attributions_df["relative_order"] = summarized_attributions_df.rank(ascending=False)

    return summarized_attributions_df

In [116]:
MODEL_FILENAME = "SmoothL1Loss_fixed_Adamax_fewer_neurons_0.2_testSize_new_StandardScaler_2048_batch_0.05_dropout_pytorch_v1.12.tar"
MODEL_DIR = rf"/home/thes1067/models/blue_waters"
MODEL_PATH = Path(MODEL_DIR, MODEL_FILENAME)

In [117]:
DATASET_DIR = r"/home/thes1067/data/claix_dataset"
DATASET_NAME = "claix_posix_npb_4_16_64_nprocs_Ciao_C_1288.csv"
DATASET_PATH = Path(DATASET_DIR, DATASET_NAME)

In [119]:
MODEL_PATH.is_file()

True

## Load the model

In [120]:
bw_model = nn.Sequential(
    nn.Linear(97, 2048),
    nn.Dropout(p=0.05),
    nn.ReLU(),
    nn.Linear(2048, 512),
    nn.Dropout(p=0.05),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.Dropout(p=0.05),
    nn.ReLU(),
    nn.Linear(128, 1),
).to(device)

In [121]:
print("Loading pretrained model...")

checkpoint = torch.load(MODEL_PATH, map_location=torch.device(device))
bw_model.load_state_dict(checkpoint['model_state_dict'])
model_epoch = checkpoint['epoch']

print(f"Current epoch: {model_epoch}")

Loading pretrained model...
Current epoch: 599


In [122]:
bw_model.eval()

Sequential(
  (0): Linear(in_features=97, out_features=2048, bias=True)
  (1): Dropout(p=0.05, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=2048, out_features=512, bias=True)
  (4): Dropout(p=0.05, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=512, out_features=128, bias=True)
  (7): Dropout(p=0.05, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=128, out_features=1, bias=True)
)

### Fix the seeds


In [123]:
random_seed = 1234
split_seed = 42

random.seed(random_seed)
np.random.seed(random_seed)

torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)

# Set PyTorch to use deterministic algorithms if possible
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Load the data

In [124]:
df_claix_posix = pd.read_csv(DATASET_PATH)

In [125]:
len(df_claix_posix)

1163

In [126]:
df_claix_posix.head()

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_FDSYNCS,...,POSIX_F_VARIANCE_RANK_TIME,POSIX_F_VARIANCE_RANK_BYTES,uid,start_time,end_time,nprocs,jobid,lib_ver,hints,bandwidth
0,32,0,0,440,440,78,0,-1,0,0,...,2.520036,1.084529e+19,38849,2022-07-26 20:10:07,2022-07-26 20:11:18,16,28860382,3.3.1,romio_no_indep_rw=true;cb_nodes=4,1971.700985
1,32,0,0,440,440,78,0,-1,0,0,...,2.567556,1.084529e+19,38849,2022-07-25 23:45:34,2022-07-25 23:46:49,16,28845229,3.3.1,romio_no_indep_rw=true;cb_nodes=4,1952.503983
2,32,0,0,440,440,78,0,-1,0,0,...,2.458673,1.084529e+19,38849,2022-07-26 20:52:45,2022-07-26 20:53:56,16,28860382,3.3.1,romio_no_indep_rw=true;cb_nodes=4,1996.763339
3,32,0,0,440,440,78,0,-1,0,0,...,2.409477,1.084529e+19,38849,2022-07-26 21:41:19,2022-07-26 21:42:29,16,28860382,3.3.1,romio_no_indep_rw=true;cb_nodes=4,2017.040999
4,32,0,0,440,440,78,0,-1,0,0,...,2.466827,1.084529e+19,38849,2022-07-26 18:02:28,2022-07-26 18:03:39,16,28860381,3.3.1,romio_no_indep_rw=true;cb_nodes=4,1992.35133


### Drop the non-invariant columns

In [127]:
df_claix_posix = df_claix_posix.drop(['uid', 'jobid', 'hints', 'start_time', 'end_time', 'lib_ver'],
                                                               axis=1)

### Drop columns to match the Blue Waters dataset on which the model was trained

In [128]:
df_claix_posix = df_claix_posix.drop(['POSIX_FDSYNCS',
                                        'POSIX_RENAMED_FROM',
                                        'POSIX_F_VARIANCE_RANK_TIME',
                                        'POSIX_F_VARIANCE_RANK_BYTES'],
                                        axis=1)			

### Separate bandwidth from input features

In [129]:
df_bandwidth = df_claix_posix.pop('bandwidth')
df_claix_posix

Unnamed: 0,POSIX_OPENS,POSIX_FILENOS,POSIX_DUPS,POSIX_READS,POSIX_WRITES,POSIX_SEEKS,POSIX_STATS,POSIX_MMAPS,POSIX_FSYNCS,POSIX_RENAME_SOURCES,...,rank,POSIX_F_READ_TIME,POSIX_F_WRITE_TIME,POSIX_F_META_TIME,POSIX_TOTAL_TIME,POSIX_F_MAX_READ_TIME,POSIX_F_MAX_WRITE_TIME,POSIX_F_FASTEST_RANK_TIME,POSIX_F_SLOWEST_RANK_TIME,nprocs
0,32,0,0,440,440,78,0,-1,0,0,...,-1,1.396742,5.161620,0.022065,6.580427,0.005862,0.155172,0.000530,6.559490,16
1,32,0,0,440,440,78,0,-1,0,0,...,-1,1.388736,5.230402,0.025987,6.645125,0.005642,0.016469,0.000806,6.621231,16
2,32,0,0,440,440,78,0,-1,0,0,...,-1,1.374670,5.102114,0.021048,6.497833,0.006568,0.130111,0.000555,6.479013,16
3,32,0,0,440,440,78,0,-1,0,0,...,-1,1.382730,5.028884,0.020894,6.432509,0.006548,0.017362,0.000570,6.413866,16
4,32,0,0,440,440,78,0,-1,0,0,...,-1,1.392669,5.095290,0.024262,6.512222,0.006047,0.083048,0.000641,6.489974,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1158,8,0,0,440,440,78,0,-1,0,0,...,-1,1.408159,4.991706,0.003280,6.403145,0.005342,0.089964,0.000726,6.400820,4
1159,8,0,0,440,440,78,0,-1,0,0,...,-1,1.421173,5.007220,0.003163,6.431556,0.005848,0.090479,0.000674,6.429183,4
1160,3248,2542,0,107198,9041,78248,397,-43,0,0,...,-1,284.566834,198.561634,0.594587,483.723055,1.709868,0.548601,0.000000,14.079545,48
1161,16112,12718,0,389198,35189,236638,1933,-43,0,0,...,-1,2329.200567,100.632465,3.125118,2432.958150,1.931765,0.226163,0.000000,17.771790,240


In [130]:
df_bandwidth.describe()

count    1163.000000
mean     1814.994168
std       393.298187
min         0.084217
25%      1953.342184
50%      1986.288164
75%      2007.708468
max      2070.166839
Name: bandwidth, dtype: float64

### Scale the input features

In [None]:
X = df_claix_posix.to_numpy()

In [136]:
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)

In [137]:
tensor_X = torch.Tensor(X_scaled).to(device)
tensor_y = torch.Tensor(df_bandwidth.to_numpy()).view(-1, 1).to(device) 

## Use Integrated Gradients to attribute the importance to the features

In [143]:
baseline = torch.zeros(len(tensor_X), 97).to(device)
baseline

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [144]:
bw_ig = IntegratedGradients(bw_model)
bw_ig_attributions, bw_ig_delta = bw_ig.attribute(tensor_X, baseline, n_steps=300, return_convergence_delta=True)
bw_ig_attributions_df = annotate_attributions(bw_ig_attributions, columns = list(df_claix_posix.columns))
print('Delta Avg:', torch.mean(bw_ig_delta))

Delta Avg: tensor(-0.3810, device='cuda:0', dtype=torch.float64)


## Other feature attribution methods

In [145]:
ig_nt_bw = NoiseTunnel(bw_ig)
dl_bw = DeepLift(bw_model)
# Fails due to memory constraints
# dls = DeepLiftShap(claix_model)
gs_bw = GradientShap(bw_model)
fa_bw = FeatureAblation(bw_model)
fp_bw = FeaturePermutation(bw_model)
sal_bw = Saliency(bw_model)
input_x_grad_bw = InputXGradient(bw_model)
guided_back_prop_bw = GuidedBackprop(bw_model)
# Takes too long to compute
# shap = ShapleyValues(bw_model)
shap_sampling_bw = ShapleyValueSampling(bw_model)

In [146]:
def perturb_fn(inputs):
    noise = torch.tensor(np.random.normal(0, 0.003, inputs.shape)).float().to(device)
    return noise, inputs - noise

In [147]:
# shap_attr_bw = shap.attribute(tensor_X_test)
# shap_attr_bw_df = annotate_attributions(shap_attr_bw, columns=list(df_claix_posix.columns))

In [148]:
sal_attr_bw = sal_bw.attribute(tensor_X)
sal_attr_df_bw = annotate_attributions(sal_attr_bw, columns=list(df_claix_posix.columns))



In [149]:
sens = sensitivity_max(sal_bw.attribute, tensor_X)
torch.mean(sens)



tensor(0.1178, device='cuda:0')

In [150]:
infid = infidelity(bw_model, perturb_fn, tensor_X, sal_attr_bw)
torch.mean(infid)

tensor(18982570., device='cuda:0')

In [151]:
fp_attr_bw = fp_bw.attribute(tensor_X)
fp_attr_df_bw = annotate_attributions(fp_attr_bw, columns=list(df_claix_posix.columns))

In [152]:
sens = sensitivity_max(fp_bw.attribute, tensor_X)
torch.mean(sens)

tensor(5.6762, device='cuda:0')

In [153]:
infid = infidelity(bw_model, perturb_fn, tensor_X, fp_attr_bw)
torch.mean(infid)

tensor(5013831.5000, device='cuda:0')

In [154]:
input_x_grad_attr_bw = input_x_grad_bw.attribute(tensor_X)
input_x_grad_attr_df_bw = annotate_attributions(input_x_grad_attr_bw, columns=list(df_claix_posix.columns))



In [155]:
sens = sensitivity_max(input_x_grad_bw.attribute, tensor_X)
torch.mean(sens)



tensor(0.2964, device='cuda:0')

In [156]:
infid = infidelity(bw_model, perturb_fn, tensor_X, input_x_grad_attr_bw)
torch.mean(infid)

tensor(5513831.5000, device='cuda:0')

In [157]:
guided_back_prop_attr_bw = guided_back_prop_bw.attribute(tensor_X)
guided_back_prop_attr_df_bw = annotate_attributions(guided_back_prop_attr_bw, columns=list(df_claix_posix.columns))



In [158]:
# guided_back_prop.has_convergence_delta()

sens = sensitivity_max(guided_back_prop_bw.attribute, tensor_X)
torch.mean(sens)



tensor(0.1060, device='cuda:0')

In [159]:
infid = infidelity(bw_model, perturb_fn, tensor_X, guided_back_prop_attr_bw)
torch.mean(infid)

tensor(444340.5000, device='cuda:0')

In [160]:
shap_sampling_attr_bw = shap_sampling_bw.attribute(tensor_X)
shap_sampling_attr_df_bw = annotate_attributions(shap_sampling_attr_bw, columns=list(df_claix_posix.columns))

In [161]:
sens = sensitivity_max(shap_sampling_bw.attribute, tensor_X)
torch.mean(sens)

tensor(0.3463, device='cuda:0')

In [162]:
infid = infidelity(bw_model, perturb_fn, tensor_X, shap_sampling_attr_bw)
torch.mean(infid)

tensor(5263412., device='cuda:0')

In [163]:
ig_nt_attr_bw, ig_nt_delta_bw = ig_nt_bw.attribute(tensor_X, return_convergence_delta=True)
# display(ig_nt_delta)
print("Delta Avg: ", torch.mean(ig_nt_delta_bw))
ig_nt_attr_df_bw = annotate_attributions(ig_nt_attr_bw, columns=list(df_claix_posix.columns))

Delta Avg:  tensor(1.6158, device='cuda:0', dtype=torch.float64)


In [164]:
dl_attr_bw, dl_delta_bw = dl_bw.attribute(tensor_X, return_convergence_delta=True)
print("Delta Avg: ", torch.mean(dl_delta_bw))
dl_attr_df_bw = annotate_attributions(dl_attr_bw, columns=list(df_claix_posix.columns))

Delta Avg:  tensor(-0.0006, device='cuda:0')


               activations. The hooks and attributes will be removed
            after the attribution is finished


In [165]:
fa_attr_bw = fa_bw.attribute(tensor_X)
fa_attr_df_bw = annotate_attributions(fa_attr_bw, columns=list(df_claix_posix.columns))

In [166]:
sens = sensitivity_max(fa_bw.attribute, tensor_X)
torch.mean(sens)

tensor(0.2967, device='cuda:0')

In [167]:
infid = infidelity(bw_model, perturb_fn, tensor_X, fa_attr_bw)
torch.mean(infid)

tensor(5515871., device='cuda:0')

In [170]:
def calc_norm_sum(attributions_df):
    attributions_sum = attributions_df.sum(0)
    return (attributions_sum / np.linalg.norm(attributions_sum, ord=1)).to_frame(name="norm_attr_coeff") # Just division returns a Series, so cast it to a DataFrame

In [171]:
ig_attr_norm_sum_bw = calc_norm_sum(bw_ig_attributions_df)
ig_attr_norm_sum_bw

Unnamed: 0,norm_attr_coeff
POSIX_OPENS,0.001307
POSIX_FILENOS,0.008035
POSIX_DUPS,0.000000
POSIX_READS,0.010389
POSIX_WRITES,0.003002
...,...
POSIX_F_MAX_READ_TIME,0.029408
POSIX_F_MAX_WRITE_TIME,0.008430
POSIX_F_FASTEST_RANK_TIME,0.001172
POSIX_F_SLOWEST_RANK_TIME,0.039725


In [172]:
ig_nt_attr_test_norm_sum_bw = calc_norm_sum(ig_nt_attr_df_bw)

In [173]:
dl_attr_test_norm_sum_bw = calc_norm_sum(dl_attr_df_bw)

In [175]:
fa_attr_test_norm_sum_bw =calc_norm_sum(fa_attr_df_bw)

In [176]:
sal_norm_sum_bw = calc_norm_sum(sal_attr_df_bw)

In [177]:
input_x_grad_norm_sum_bw = calc_norm_sum(input_x_grad_attr_df_bw)

In [178]:
guided_back_prop_norm_sum_bw = calc_norm_sum(guided_back_prop_attr_df_bw)

In [179]:
fp_attr_norm_sum_bw = calc_norm_sum(fp_attr_df_bw)

In [180]:
shap_sampling_attr_norm_sum_bw = calc_norm_sum(shap_sampling_attr_df_bw)

In [181]:
aggregated_feature_attributions_normalized_df_bw = pd.concat([ig_attr_norm_sum_bw, ig_nt_attr_test_norm_sum_bw, dl_attr_test_norm_sum_bw,
                                                                fa_attr_test_norm_sum_bw, shap_sampling_attr_norm_sum_bw, guided_back_prop_norm_sum_bw, fp_attr_norm_sum_bw, input_x_grad_norm_sum_bw, sal_norm_sum_bw],
                                                                axis=1, join="inner")
aggregated_feature_attributions_normalized_df_bw.columns = ["ig", "ig_noise_tunnel", "deeplift",
                                                                "feature_ablation", "shap_sampling", "guided_backprop", "feature_permutation", "input_x_grad", "saliency"]
aggregated_feature_attributions_normalized_df_bw

Unnamed: 0,ig,ig_noise_tunnel,deeplift,feature_ablation,shap_sampling,guided_backprop,feature_permutation,input_x_grad,saliency
POSIX_OPENS,0.001307,-0.000275,0.000965,0.003148,0.001758,-0.002375,0.012948,0.002656,0.001752
POSIX_FILENOS,0.008035,-0.000026,0.007739,0.007731,0.005513,-0.003204,0.009773,0.007449,0.005231
POSIX_DUPS,0.000000,-0.000729,0.000000,0.000000,0.000000,-0.002611,0.000000,0.000000,0.005912
POSIX_READS,0.010389,0.003554,0.010259,0.010790,0.009178,-0.011512,0.016045,0.010483,0.007122
POSIX_WRITES,0.003002,0.000218,0.002781,0.003062,0.002213,-0.000583,0.009812,0.002926,0.002026
...,...,...,...,...,...,...,...,...,...
POSIX_F_MAX_READ_TIME,0.029408,0.017107,0.028922,0.024588,0.029598,-0.033219,0.012837,0.024493,0.019431
POSIX_F_MAX_WRITE_TIME,0.008430,-0.001409,0.008407,0.007648,0.007407,-0.005391,0.006559,0.006014,0.008629
POSIX_F_FASTEST_RANK_TIME,0.001172,-0.000276,0.001579,0.000534,0.000028,-0.000921,-0.001072,0.001822,0.002892
POSIX_F_SLOWEST_RANK_TIME,0.039725,0.009014,0.042729,0.035965,0.035463,-0.020543,0.072199,0.036237,0.018299


In [182]:
summary_feature_attributions_normalized_df_bw = calculate_avg_attributions(aggregated_feature_attributions_normalized_df_bw, axis=1)

# For each feature, show its relative order (1 - most positive influence, n - least positive influence)
# Rank features by their absolute coefficient value to identify the most important ones
summary_feature_attributions_normalized_df_bw = rank_features_by_importance(add_relative_order_of_features(summary_feature_attributions_normalized_df_bw))
summary_feature_attributions_normalized_df_bw

Unnamed: 0,avg_attribution_coeff,relative_order,feature_importance_rank
POSIX_F_READ_TIME,0.109016,1.0,1.0
POSIX_F_META_TIME,0.099567,2.0,2.0
POSIX_TOTAL_TIME,0.090075,3.0,3.0
POSIX_F_SLOWEST_RANK_TIME,0.029899,4.0,4.0
POSIX_MAX_WRITE_TIME_SIZE,0.020044,5.0,5.0
...,...,...,...
POSIX_SIZE_READ_100K_1M,-0.007846,93.0,19.0
POSIX_MODE,-0.008326,94.0,18.0
WRITE_10K_100K,-0.008513,95.0,16.0
READ_100K_1M,-0.008534,96.0,15.0


In [183]:
summary_feature_attributions_normalized_df_bw.loc["POSIX_BYTES_READ"]

avg_attribution_coeff       0.001336
relative_order             37.000000
feature_importance_rank    54.000000
Name: POSIX_BYTES_READ, dtype: float64

# Evaluate the model fine-tuned on the Claix dataset

### Load the pre-trained model

In [200]:
claix_model = nn.Sequential(
    nn.Linear(97, 2048),
    nn.Dropout(p=0.05),
    nn.ReLU(),
    nn.Linear(2048, 512),
    nn.Dropout(p=0.05),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.Dropout(p=0.05),
    nn.ReLU(),
    nn.Linear(128, 1),
).to(device)

In [201]:
claix_model.modules

<bound method Module.modules of Sequential(
  (0): Linear(in_features=97, out_features=2048, bias=True)
  (1): Dropout(p=0.05, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=2048, out_features=512, bias=True)
  (4): Dropout(p=0.05, inplace=False)
  (5): ReLU()
  (6): Linear(in_features=512, out_features=128, bias=True)
  (7): Dropout(p=0.05, inplace=False)
  (8): ReLU()
  (9): Linear(in_features=128, out_features=1, bias=True)
)>

In [None]:
CLAIX_MODEL_DIR = "/home/thes1067/models/claix"
CLAIX_MODEL_NAME = "SmoothL1Loss_0.2_testSize_StandardScaler_0.001_lr_0.05_dropout_pytorch_v1.12.0"
CLAIX_MODEL_PATH = Path(CLAIX_MODEL_DIR, CLAIX_MODEL_NAME)
CLAIX_MODEL_PATH

In [204]:
CLAIX_MODEL_PATH.exists()

True

In [205]:
print("Loading pretrained model...")

checkpoint = torch.load(CLAIX_MODEL_PATH, map_location=torch.device(device))
claix_model.load_state_dict(checkpoint['model_state_dict'])
model_epoch = checkpoint['epoch']

print(f"Current epoch: {model_epoch}")

Loading pretrained model...
Current epoch: 1199


In [245]:
ig_claix = IntegratedGradients(claix_model)
ig_attributions_claix, ig_delta_claix = ig_claix.attribute(tensor_X, baseline, n_steps=300, return_convergence_delta=True)
ig_attributions_df_claix = annotate_attributions(ig_attributions_claix, columns = list(df_claix_posix.columns))
print('Delta Avg:', torch.mean(ig_delta_claix))

Delta Avg: tensor(-0.1277, device='cuda:0', dtype=torch.float64)


In [246]:
ig_nt_claix = NoiseTunnel(ig_claix)
dl_claix = DeepLift(claix_model)
gs_claix = GradientShap(claix_model)
fa_claix = FeatureAblation(claix_model)
fp_claix = FeaturePermutation(claix_model)
sal_claix = Saliency(claix_model)
input_x_grad_claix = InputXGradient(claix_model)
guided_back_prop_claix = GuidedBackprop(claix_model)
shap_sampling_claix = ShapleyValueSampling(claix_model)

In [208]:
def perturb_fn(inputs):
    noise = torch.tensor(np.random.normal(0, 0.003, inputs.shape)).float().to(device)
    return noise, inputs - noise

In [210]:
sal_attr_claix = sal_claix.attribute(tensor_X)
sal_attr_df_claix = annotate_attributions(sal_attr_claix, columns=list(df_claix_posix.columns))



In [211]:
sens = sensitivity_max(sal_claix.attribute, tensor_X)
torch.mean(sens)



tensor(0.2804, device='cuda:0')

In [212]:
infid = infidelity(claix_model, perturb_fn, tensor_X, sal_attr_claix)
torch.mean(infid)

tensor(77946.9375, device='cuda:0')

In [213]:
fp_attr_claix = fp_claix.attribute(tensor_X)
fp_attr_df_claix = annotate_attributions(fp_attr_claix, columns=list(df_claix_posix.columns))

In [214]:
sens = sensitivity_max(fp_claix.attribute, tensor_X)
torch.mean(sens)

tensor(2.1204, device='cuda:0')

In [215]:
infid = infidelity(claix_model, perturb_fn, tensor_X, fp_attr_claix)
torch.mean(infid)

tensor(71576.8516, device='cuda:0')

In [216]:
input_x_grad_attr_claix = input_x_grad_claix.attribute(tensor_X)
input_x_grad_attr_df_claix = annotate_attributions(input_x_grad_attr_claix, columns=list(df_claix_posix.columns))



In [217]:
sens = sensitivity_max(input_x_grad_claix.attribute, tensor_X)
torch.mean(sens)



tensor(0.3960, device='cuda:0')

In [218]:
infid = infidelity(claix_model, perturb_fn, tensor_X, input_x_grad_attr_claix)
torch.mean(infid)

tensor(72811.1172, device='cuda:0')

In [219]:
guided_back_prop_attr_claix = guided_back_prop_claix.attribute(tensor_X)
guided_back_prop_attr_df_claix = annotate_attributions(guided_back_prop_attr_claix, columns=list(df_claix_posix.columns))



In [220]:
sens = sensitivity_max(guided_back_prop_claix.attribute, tensor_X)
torch.mean(sens)



tensor(0.1474, device='cuda:0')

In [221]:
infid = infidelity(claix_model, perturb_fn, tensor_X, guided_back_prop_attr_claix)
torch.mean(infid)

tensor(73665.5156, device='cuda:0')

In [222]:
shap_sampling_attr_claix = shap_sampling_claix.attribute(tensor_X)
shap_sampling_attr_df_claix = annotate_attributions(shap_sampling_attr_claix, columns=list(df_claix_posix.columns))

In [223]:
sens = sensitivity_max(shap_sampling_claix.attribute, tensor_X)
torch.mean(sens)

tensor(0.6285, device='cuda:0')

In [224]:
infid = infidelity(claix_model, perturb_fn, tensor_X, shap_sampling_attr_claix)
torch.mean(infid)

tensor(71078.6562, device='cuda:0')

In [None]:
ig_nt_attr_claix, ig_nt_delta_claix = ig_nt_claix.attribute(tensor_X, return_convergence_delta=True)
print("Delta Avg: ", torch.mean(ig_nt_delta_claix))
ig_nt_attr_df_claix = annotate_attributions(ig_nt_attr_claix, columns=list(df_claix_posix.columns))

In [226]:
dl_attr_claix, dl_delta_claix = dl_claix.attribute(tensor_X, return_convergence_delta=True)
print("Delta Avg: ", torch.mean(dl_delta_claix))
dl_attr_df_claix = annotate_attributions(dl_attr_claix, columns=list(df_claix_posix.columns))

Delta Avg:  tensor(0.7711, device='cuda:0')


               activations. The hooks and attributes will be removed
            after the attribution is finished


In [227]:
fa_attr_claix = fa_claix.attribute(tensor_X)
fa_attr_df_claix = annotate_attributions(fa_attr_claix, columns=list(df_claix_posix.columns))

In [228]:
sens = sensitivity_max(fa_claix.attribute, tensor_X)
torch.mean(sens)

tensor(2.0060, device='cuda:0')

In [229]:
infid = infidelity(claix_model, perturb_fn, tensor_X, fa_attr_claix)
torch.mean(infid)

tensor(72554.4062, device='cuda:0')

In [232]:
def calc_norm_sum(attributions_df):
    attributions_sum = attributions_df.sum(0)
    return (attributions_sum / np.linalg.norm(attributions_sum, ord=1)).to_frame(name="norm_attr_coeff") # Just division returns a Series, so cast it to a DataFrame

In [233]:
ig_attr_norm_sum_claix = calc_norm_sum(ig_attributions_df_claix)
ig_attr_norm_sum_claix

Unnamed: 0,norm_attr_coeff
POSIX_OPENS,0.001016
POSIX_FILENOS,0.010685
POSIX_DUPS,0.000000
POSIX_READS,0.012602
POSIX_WRITES,0.004056
...,...
POSIX_F_MAX_READ_TIME,0.030460
POSIX_F_MAX_WRITE_TIME,0.004470
POSIX_F_FASTEST_RANK_TIME,0.001041
POSIX_F_SLOWEST_RANK_TIME,0.049991


In [234]:
ig_nt_attr_test_norm_sum_claix = calc_norm_sum(ig_nt_attr_df_claix)

In [235]:
dl_attr_test_norm_sum_claix = calc_norm_sum(dl_attr_df_claix)

In [237]:
fa_attr_test_norm_sum_claix =calc_norm_sum(fa_attr_df_claix)

In [238]:
sal_norm_sum_claix = calc_norm_sum(sal_attr_df_claix)

In [239]:
input_x_grad_norm_sum_claix = calc_norm_sum(input_x_grad_attr_df_claix)

In [240]:
guided_back_prop_norm_sum_claix = calc_norm_sum(guided_back_prop_attr_df_claix)

In [241]:
fp_attr_norm_sum_claix = calc_norm_sum(fp_attr_df_claix)

In [242]:
shap_sampling_attr_norm_sum_claix = calc_norm_sum(shap_sampling_attr_df_claix)

In [243]:
aggregated_feature_attributions_normalized_df_claix = pd.concat([ig_attr_norm_sum_claix, ig_nt_attr_test_norm_sum_claix, dl_attr_test_norm_sum_claix,
                                                                    fa_attr_test_norm_sum_claix, shap_sampling_attr_norm_sum_claix, guided_back_prop_norm_sum_claix, fp_attr_norm_sum_claix, input_x_grad_norm_sum_claix, sal_norm_sum_claix],
                                                                    axis=1, join="inner")
aggregated_feature_attributions_normalized_df_claix.columns = ["ig", "ig_noise_tunnel", "deeplift",
                                                                    "feature_ablation", "shap_sampling", "guided_backprop", "feature_permutation", "input_x_grad", "saliency"]
aggregated_feature_attributions_normalized_df_claix

Unnamed: 0,ig,ig_noise_tunnel,deeplift,feature_ablation,shap_sampling,guided_backprop,feature_permutation,input_x_grad,saliency
POSIX_OPENS,0.001016,0.000132,0.000977,0.001680,0.001386,-0.003259,0.008660,0.001800,0.001201
POSIX_FILENOS,0.010685,0.000793,0.010669,0.009386,0.008530,-0.004181,0.008091,0.011575,0.007928
POSIX_DUPS,0.000000,-0.000026,0.000000,0.001199,0.000095,-0.001661,-0.000654,0.000000,0.002134
POSIX_READS,0.012602,0.003475,0.012640,0.012003,0.012680,-0.012453,0.017244,0.012742,0.008462
POSIX_WRITES,0.004056,0.000426,0.004015,0.005471,0.003338,-0.001273,0.004154,0.004177,0.002897
...,...,...,...,...,...,...,...,...,...
POSIX_F_MAX_READ_TIME,0.030460,0.015106,0.029643,0.026957,0.033147,-0.031807,0.009965,0.028606,0.022330
POSIX_F_MAX_WRITE_TIME,0.004470,0.000729,0.004188,-0.002340,0.004118,0.000010,0.000237,0.001471,0.001075
POSIX_F_FASTEST_RANK_TIME,0.001041,-0.000162,0.001203,0.001024,0.000640,0.001041,0.005046,0.000158,0.000321
POSIX_F_SLOWEST_RANK_TIME,0.049991,0.010141,0.050500,0.044874,0.052788,-0.024192,0.067315,0.048802,0.025347


In [244]:
summary_feature_attributions_normalized_df_claix = calculate_avg_attributions(aggregated_feature_attributions_normalized_df_claix, axis=1)

# For each feature, show its relative order (1 - most positive influence, n - least positive influence)
# Rank features by their absolute coefficient value to identify the most important ones
summary_feature_attributions_normalized_df_claix = rank_features_by_importance(add_relative_order_of_features(summary_feature_attributions_normalized_df_claix))
summary_feature_attributions_normalized_df_claix

Unnamed: 0,avg_attribution_coeff,relative_order,feature_importance_rank
POSIX_F_META_TIME,0.117892,1.0,1.0
POSIX_F_READ_TIME,0.117351,2.0,2.0
POSIX_TOTAL_TIME,0.093328,3.0,3.0
POSIX_F_SLOWEST_RANK_TIME,0.036174,4.0,4.0
POSIX_F_MAX_READ_TIME,0.018268,5.0,5.0
...,...,...,...
POSIX_SIZE_READ_100K_1M,-0.005597,93.0,24.0
POSIX_BYTES_WRITTEN,-0.006540,94.0,22.0
WRITE_10K_100K,-0.006683,95.0,20.0
POSIX_MODE,-0.009406,96.0,14.0


In [83]:
summary_feature_attributions_normalized_df_claix.loc["POSIX_BYTES_READ"]

avg_attribution_coeff       0.001336
relative_order             37.000000
feature_importance_rank    54.000000
Name: POSIX_BYTES_READ, dtype: float64

In [71]:
display(claix_avg_ranked_attributions_df)

Unnamed: 0,avg_attribution_coeff,relative_order,feature_importance_rank
POSIX_F_READ_TIME,532.499505,1.0,1.0
POSIX_TOTAL_TIME,390.365359,2.0,2.0
POSIX_F_META_TIME,343.351446,3.0,3.0
POSIX_F_SLOWEST_RANK_TIME,143.429679,4.0,4.0
POSIX_F_MAX_READ_TIME,87.390850,5.0,5.0
...,...,...,...
WRITE_10K_100K,-33.000360,93.0,18.0
POSIX_STRIDE3_STRIDE,-37.243687,94.0,14.0
POSIX_BYTES_WRITTEN,-41.687902,95.0,12.0
POSIX_MODE,-56.894887,96.0,8.0


In [72]:
features_importance_comparison_df = pd.merge(summary_feature_attributions_normalized_df_bw, summary_feature_attributions_normalized_df_claix, left_index=True, right_index=True, suffixes=["_bw", "_claix"])
features_importance_comparison_df

In [75]:
features_importance_comparison_df.to_csv(r"/home/eo080593/Projects/2021-dmytro-povaliaiev/sourcecode/visualization/data/Feature_Attributions_Full_vs_Fine-tuned.csv")

In [287]:
aggregated_feature_attributions_avg = pd.concat([bw_avg_attributions_df, ig_nt_avg_attr, dl_attr_avg, gs_attr_avg, fa_attr_avg, shap_sampling_attr_avg, guided_back_prop_attr_avg, fp_attr_avg, input_x_grad_attr_avg, sal_avg_attr], axis=1, join="inner")
aggregated_feature_attributions_avg.columns = ["ig", "ig_noise_tunnel", "deeplift", "gradientshap", "feature_ablation", "shap_sampling", "guided_backprop", "feature_permutation", "input_x_grad", "saliency"]
aggregated_feature_attributions_avg

Unnamed: 0,ig,ig_noise_tunnel,deeplift,gradientshap,feature_ablation,shap_sampling,guided_backprop,feature_permutation,input_x_grad,saliency
POSIX_F_READ_TIME,22555.235103,61291.281147,25156.373047,-708.246582,29200.218750,24147.093750,-296217.812500,326.970581,29735.417969,291629.375000
POSIX_TOTAL_TIME,16660.750364,86396.627293,18804.513672,-231.063446,21874.421875,17994.226562,-333323.250000,316.392670,22085.056641,333218.781250
POSIX_F_META_TIME,12806.998625,148915.586186,14409.471680,3.067998,18685.117188,13581.669922,-391480.093750,46.429176,18564.048828,561296.125000
POSIX_F_SLOWEST_RANK_TIME,4987.124063,3232.381576,5369.414062,617.464478,7213.912598,4636.626953,-34776.613281,1844.427246,7124.248535,46195.160156
POSIX_F_MAX_READ_TIME,3691.950321,6658.701544,3876.315674,-46.512398,4957.162598,3767.761719,-55492.292969,70.694702,4996.842773,47867.132812
...,...,...,...,...,...,...,...,...,...,...
POSIX_BYTES_WRITTEN,-2169.990030,-2383.134204,-2302.347168,15.510158,-3827.800781,-2104.962646,15386.246094,135.771759,-4375.528809,39114.734375
WRITE_10K_100K,-2189.946022,-3865.961135,-2164.196533,-20.773529,-4777.125488,-2761.839111,6103.601562,130.342880,-5055.902344,40884.074219
READ_100K_1M,-2195.874262,-4023.120175,-2255.310059,22.440395,-4341.409668,-3003.796631,-1124.387817,167.452576,-5313.726074,50255.136719
POSIX_MODE,-2575.584399,-2689.931016,-2768.075195,-25.652847,-3885.096680,-2185.000977,9348.550781,189.910950,-4753.264160,36103.546875


In [288]:
aggregated_feature_attributions_avg.mean(axis=1).to_frame("avg_attribution_coeff")

Unnamed: 0,avg_attribution_coeff
POSIX_F_READ_TIME,18711.590626
POSIX_TOTAL_TIME,18379.645688
POSIX_F_META_TIME,39682.842085
POSIX_F_SLOWEST_RANK_TIME,4644.414639
POSIX_F_MAX_READ_TIME,2034.775678
...,...
POSIX_BYTES_WRITTEN,3748.849875
WRITE_10K_100K,2628.227450
READ_100K_1M,2818.740500
POSIX_MODE,2675.940333


In [289]:
aggregated_feature_attributions_avg = rank_features_by_importance(add_relative_order_of_features(aggregated_feature_attributions_avg.mean(axis=1).to_frame("avg_attribution_coeff")))
aggregated_feature_attributions_avg

Unnamed: 0,avg_attribution_coeff,relative_order,feature_importance_rank
POSIX_F_META_TIME,39682.842085,1.0,1.0
POSIX_F_READ_TIME,18711.590626,2.0,2.0
POSIX_TOTAL_TIME,18379.645688,3.0,3.0
POSIX_MAX_WRITE_TIME_SIZE,8744.312691,4.0,4.0
POSIX_ACCESS2_ACCESS,6479.626682,5.0,5.0
...,...,...,...
READ_1K_10K,-578.440659,93.0,63.0
POSIX_SIZE_READ_1K_10K,-615.256061,94.0,62.0
POSIX_SLOWEST_RANK,-970.510207,95.0,43.0
rank,-1369.661555,96.0,31.0
