# This notebook performs analysis on the logs obtained from fault injection simulations on **PCAHyperspectralClassifier**.

In [1]:
DATASET = 'indianPines' #'pavia_uni' 'salinas'
INSTR_GROUP = 'G_FP32'# 'G_GP'
INSTR_GROUP_DICT = {'G_FP32': 1, 'G_GP': 7}
MACHINE = 'workstation'#'laptop' 
PCA = 'PCA10'#'PCA50''PCA7'
HARDENING = True

In [2]:
LOGS_DICT = 'pca10_hardened_logs' if HARDENING else PCA.lower()+'_logs'
LOGS_DICT

'pca10_hardened_logs'

Unzip archive containing logs from NVBitFI

In [3]:
!rm -r analysis_cache
!mkdir analysis_cache

In [None]:
#!cp pca50_logs/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}.zip analysis_cache
#%cd analysis_cache
#!unzip {DATASET}_{PCA}_{INSTR_GROUP}.zip
#%cd logs/pca_hyperspectral/

#!cp pca10_logs/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}_laptop.zip analysis_cache
#%cd analysis_cache
#!unzip {DATASET}_{PCA}_{INSTR_GROUP}_laptop.zip
#%cd logs/pca_hyperspectral/

#!cp pca10_hardened_logs/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}_hardened.zip analysis_cache
#%cd analysis_cache
#!unzip {DATASET}_{PCA}_{INSTR_GROUP}_hardened.zip
#%cd logs/pca_hyperspectral/

if HARDENING:
    !unzip {LOGS_DICT}/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}_hardened.zip -d analysis_cache
else:
    !unzip {LOGS_DICT}/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}.zip -d analysis_cache

%cd analysis_cache/logs/pca_hyperspectral/

#!unzip pca10_hardened_logs_new/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}_hardened.zip -d analysis_cache
#%cd analysis_cache/logs/pca_hyperspectral/

#!unzip pca10_logs/{DATASET}_logs/{DATASET}_{PCA}_{INSTR_GROUP}.zip -d analysis_cache
#%cd analysis_cache/logs/pca_hyperspectral/

# Create dataframe
Create dataframe from "nvbitfi-injection-log-temp.txt" files present in each subfolder of 'pca_hyperspectral'

You will also need to save the "golden_probabilities" (i.e., the output logits of the neural network without any fault injected) and put them in the right directory (you can infer it from the cell below)


In [5]:
from genericpath import isfile
import os
import numpy as np

# load golden probabilities

#golden_probabilities = np.load(f'../../../pca10_logs/golden_probabilities/{DATASET}_{PCA}_golden_probabilities_{MACHINE}.npy')
#golden_probabilities = np.load(f'../../../pca10_hardened_logs/golden_probabilities/{DATASET}_{PCA}_hardened_golden_probabilities.npy')
golden_probabilities = np.load(f'../../../{LOGS_DICT}/golden_probabilities/{DATASET}_{PCA}_golden_probabilities_{MACHINE}.npy')

list_dict = []

for icount in range(1, 1001):
  #print(f'--------group: {group}  icount: {icount}--------')
  path = f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/nvbitfi-injection-log-temp.txt'

  dict_line = {}
  dict_line['group'] = INSTR_GROUP_DICT[INSTR_GROUP]
  dict_line['icount'] = icount

  # some injections results have 'nvbitfi-injection-log-temp.txt' empty. In these cases,
  # we can look at 'nvbitfi-injection-info.txt' to determine the target kernel
#    if not os.path.isfile(path):
#      print(f"group: {group} icount: {icount}")

  if os.stat(path).st_size == 0:
    i = 0
    with open(f"./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/nvbitfi-injection-info.txt") as f:
      for line in f:
        if i == 2:
          dict_line['inspecting'] = line.strip()
        i += 1

    if not os.path.isfile(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/prediction_inference.tif'):
      dict_line['missing_output'] = True

  if os.path.isfile(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/diff.log'):
    # based on whether diff.log is empty or not we understand if the injection led to masked or sdc, respectively
    dict_line['diff_empty'] = (os.stat(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/diff.log').st_size == 0)

  with open(path) as f:
    for line in f:

      if 'ERROR' in line:
        dict_line['ERROR'] = line

        if 'inspecting' not in dict_line.keys():
          i = 0
          with open(f"./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/nvbitfi-injection-info.txt") as f:
            for line in f:
              if i == 2:
                dict_line['inspecting'] = line.strip()
              i += 1

        break

      if 'Injection data' in line:
        continue

      l = line.strip().split(': ')

      if l[0] == 'grp 0':
        grp_instr_counts = {}

        #grp_instr_counts[l[2*i]] = l[2*i + 1]
        grp_instr_counts['grp 0'] = int(l[1].split(' ')[0])
        grp_instr_counts['grp 1'] = int(l[2].split(' ')[0])
        grp_instr_counts['grp 2'] = int(l[3].split(' ')[0])
        grp_instr_counts['grp 3'] = int(l[4].split(' ')[0])
        grp_instr_counts['grp 4'] = int(l[5].split(' ')[0])
        grp_instr_counts['grp 5'] = int(l[6].split(' ')[0])
        grp_instr_counts['grp 6'] = int(l[7].split(' ')[0])
        grp_instr_counts['grp 7'] = int(l[8].strip())

        dict_line['counts per instr group'] = grp_instr_counts

      elif l[0] == 'beforeVal':
        rest = l[1].strip().split(';')
        dict_line[l[0]] = rest[0]

        #rest2 = rest[1].strip().split(': ')
        dict_line[rest[1]] = l[2]

      else:
        dict_line[l[0]] = l[1]

  list_dict.append(dict_line)

In [None]:
len(list_dict)#d_empty

Trim kernel names

In [7]:
# some of the (static) kernels share the same name but have different parameters. To avoid mapping two different kernels to the same name,
# we use this dict to map each kernel to a unique name
common_kern_names = {'voidgemv2T_kernel_val<int,int,float,float,float,float,128,16,4,4,false,false,cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>>(cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>,float,float)': 'voidgemv2T_kernel_val_VER1',
                     'voidgemv2T_kernel_val<int,int,float,float,float,float,128,16,2,2,false,false,cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>>(cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>,float,float)': 'voidgemv2T_kernel_val_VER2',
                     'voidsplitKreduce_kernel<32,16,int,float,float,float,float,true,false,false>(cublasSplitKParams<float>,floatconst*,floatconst*,float*,floatconst*,floatconst*,floatconst*,floatconst*,float*,void*,long,float*,int*)': 'voidsplitKreduce_kernel_VER1',
                     'voidsplitKreduce_kernel<32,16,int,float,float,float,float,true,true,false>(cublasSplitKParams<float>,floatconst*,floatconst*,float*,floatconst*,floatconst*,floatconst*,floatconst*,float*,void*,long,float*,int*)': 'voidsplitKreduce_kernel_VER2',
                     'voidgemv2N_kernel<int,int,float,float,float,float,128,4,4,4,1,false,cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>>(cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>)': 'voidgemv2N_kernel_VER1',
                     'voidgemv2N_kernel<int,int,float,float,float,float,128,1,4,4,1,false,cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>>(cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>)': 'voidgemv2N_kernel_VER2',
                     'voidgemv2N_kernel<int,int,float,float,float,float,128,2,4,4,1,false,cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>>(cublasGemvParams<cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<floatconst>,cublasGemvTensorStridedBatched<float>,float>)': 'voidgemv2N_kernel_VER3',
                     'voidat::native::elementwise_kernel<128,2,at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()()const::{lambda()#14}::operator()()const::{lambda(float)#1}>(at::TensorIteratorBase&,at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()()const::{lambda()#14}::operator()()const::{lambda(float)#1}const&)::{lambda(int)#1}>(int,at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()()const::{lambda()#14}::operator()()const::{lambda(float)#1}>(at::TensorIteratorBase&,at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()()const::{lambda()#14}::operator()()const::{lambda(float)#1}const&)::{lambda(int)#1})': 'elementwise_kernel_VER1',
                     'voidat::native::elementwise_kernel<128,2,at::native::gpu_kernel_impl<at::native::CUDAFunctor_add<float>>(at::TensorIteratorBase&,at::native::CUDAFunctor_add<float>const&)::{lambda(int)#1}>(int,at::native::gpu_kernel_impl<at::native::CUDAFunctor_add<float>>(at::TensorIteratorBase&,at::native::CUDAFunctor_add<float>const&)::{lambda(int)#1})': 'elementwise_kernel_VER2',
                     'sm80_xmma_fprop_implicit_gemm_indexed_tf32f32_tf32f32_f32_nhwckrsc_nhwc_tilesize128x128x16_stage4_warpsize2x2x1_g1_tensor16x8x8_kernel_cudnn': 'nhwckrsc_nhwc_tilesize128x128x16_stage4'}

for i in range(len(list_dict)):#['kernel_name'].unique():
  if 'inspecting' not in list_dict[i].keys():
    continue

  if list_dict[i]['inspecting'] in common_kern_names.keys():
    list_dict[i]['inspecting'] = common_kern_names[list_dict[i]['inspecting']]

  if 'computeBOffsetsKernel' in list_dict[i]['inspecting']:
    list_dict[i]['inspecting'] = 'computeBOffsetsKernel'

  if '<' in list_dict[i]['inspecting']:
    list_dict[i]['inspecting'] = list_dict[i]['inspecting'].strip().split('<')[0]
    #print(kern_name)

  if '::' in list_dict[i]['inspecting']:
    list_dict[i]['inspecting'] = list_dict[i]['inspecting'].strip().split("::")[-1]


# Create list of injections leading to Critical SDCs

note: Safe SDCs are included in Masked results. They will be extracted later on.

IMPORTANT: Everytime you rerun fault injections, make sure to delete the logs inside 'nvbitfi/logs/pca_hyperspectral'. Otherwise the second run of injections will overwrite on the previous one and get mixed results with the previous simulation!

IMPORTANT: always check the existence of 'prediction_inference.tif'. NVBitFI will still give an empty "diff.log" when 'prediction_inference.tif' is not produced, which can be mistaken for Masked OR SDC-safe. In such a situation, "nvbitfi-injection-log-temp.txt" should contain the word error.

We have 3 types of injection outcomes:
1. "nvbitfi-injection-log-temp.txt" w/o 'ERROR' word inside
2. "nvbitfi-injection-log-temp.txt" with 'ERROR' word inside
3. "nvbitfi-injection-log-temp.txt" empty

In [8]:
d_clean = [d for d in list_dict if 'ERROR' not in d.keys() and len(d.keys()) == 18 ]
d_error = [d for d in list_dict if 'ERROR' in d.keys()]
d_empty = [d for d in list_dict if 'ERROR' not in d.keys() and len(d.keys()) < 18]

#golden_probabilities = np.load('logs/golden_probabilities.npy')

def is_sdc_safe(icount):

  #np.load('logs_part1/pca_hyperspectral/pca_hyperspectral-group1-model0-icount1/probabilities.npy')
  probabilities = np.load(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/probabilities.npy')

  expected_matches = golden_probabilities.shape[0] * golden_probabilities.shape[1] * golden_probabilities.shape[2]

  # this checks if some of the classifications are different
  # returns true if there is at least one miscalssification
  return (golden_probabilities == probabilities).sum() != expected_matches

# get masked and sdc from d_clean
d_sdc_critical = [d for d in d_clean if d['diff_empty'] is False]
d_masked = [d for d in d_clean if (d['diff_empty'] is True) and (not is_sdc_safe(d['icount']))]
d_sdc_safe = [d for d in d_clean if (d['diff_empty'] is True) and is_sdc_safe(d['icount'])]


In [None]:
len(d_masked)

In [None]:
#golden_probabilities = np.load(f'../../golden_probabilities.npy')
golden_probabilities = np.load(f'../../../pca10_logs/golden_probabilities/{DATASET}_PCA10_golden_probabilities_{MACHINE}.npy')
probabilities = np.load(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{9}/probabilities.npy')
n = golden_probabilities - probabilities
print(n.min())
print(n.max())
#print(is_sdc_safe(9))

In [None]:
INSTR_GROUP

In [None]:
print(f'Nr outcomes that are not DUEs: {len(d_masked) + len(d_sdc_safe) + len(d_sdc_critical)}')
print(f'Nr Masked: {len(d_masked)}')
print(f'Nr SDC-safe: {len(d_sdc_safe)}')
print(f'Nr SDC-critical: {len(d_sdc_critical)}')

# Create dataframes

note: for some error outcomes NVBitFI might not report which kernel, opcode and register was targeted and so, some of the lines below will result in error. Comment them accordingly. 

In [14]:
import pandas as pd

# errors dataframe
df_error = pd.DataFrame(d_error)

if INSTR_GROUP == 'G_GP':
  df_error2 = df_error[df_error['inspecting'].notnull()].iloc[:, 3:4] #[:, 2:3] #HERE YOU MIGHT HAVE TO CHANGE iloc INDEXING TO SELECT THE COLUMN 'inspecting'

  kernel_error_counts = df_error2.groupby('inspecting').value_counts()
  #opcode_error_counts = df_error[df_error['opcode'].notnull()].iloc[:, 15:16].groupby('opcode').value_counts()
  #register_error_counts = df_error[df_error['regNo'].notnull()].iloc[:, 14:15].groupby('regNo').value_counts()
  opcode_error_counts = df_error[df_error['opcode'].notnull()].iloc[:, 11:12].groupby('opcode').value_counts()
  register_error_counts = df_error[df_error['regNo'].notnull()].iloc[:, 10:11].groupby('regNo').value_counts()

else:
  print()
  df_error2 = df_error[df_error['inspecting'].notnull()].iloc[:, 3:4] #[:, 2:3]

  #df_error2 = df_error[df_error['inspecting'].notnull()].iloc[:, 2:3] #[:, 2:3]

  kernel_error_counts = df_error2.groupby('inspecting').value_counts()
  #opcode_error_counts = df_error[df_error['opcode'].notnull()].iloc[:, 12:13].groupby('opcode').value_counts()
  #register_error_counts = df_error[df_error['regNo'].notnull()].iloc[:, 11:12].groupby('regNo').value_counts()
  opcode_error_counts = df_error[df_error['opcode'].notnull()].iloc[:, 11:12].groupby('opcode').value_counts()
  register_error_counts = df_error[df_error['regNo'].notnull()].iloc[:, 10:11].groupby('regNo').value_counts()

  #opcode_error_counts = df_error[df_error['opcode'].notnull()].iloc[:, 14:15].groupby('opcode').value_counts()
  #register_error_counts = df_error[df_error['regNo'].notnull()].iloc[:, 13:14].groupby('regNo').value_counts()

# sdcs_critical dataframe
df_sdc_critical = pd.DataFrame(d_sdc_critical)

kernel_sdc_critical_counts = df_sdc_critical.iloc[:, 3:4].groupby('inspecting').value_counts()
opcode_sdc_critical_counts = df_sdc_critical.iloc[:, 15:16].groupby('opcode').value_counts()
register_sdc_critical_counts = df_sdc_critical.iloc[:, 14:15].groupby('regNo').value_counts()

# sdcs_safe
df_sdc_safe = pd.DataFrame(d_sdc_safe)

kernel_sdc_safe_counts = df_sdc_safe.iloc[:, 3:4].groupby('inspecting').value_counts()
opcode_sdc_safe_counts = df_sdc_safe.iloc[:, 15:16].groupby('opcode').value_counts()
register_sdc_safe_counts = df_sdc_safe.iloc[:, 14:15].groupby('regNo').value_counts()

# masked dataframe
df_masked = pd.DataFrame(d_masked)

kernel_masked_counts = df_masked.iloc[:, 3:4].groupby('inspecting').value_counts()
opcode_masked_counts = df_masked.iloc[:, 15:16].groupby('opcode').value_counts()
register_masked_counts = df_masked.iloc[:, 14:15].groupby('regNo').value_counts()




In [None]:
if HARDENING:
    df_error.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_error')
    df_sdc_critical.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_critical')
    df_sdc_safe.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_safe')
    df_masked.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_masked')
else:
    df_error.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_error')
    df_sdc_critical.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_critical')
    df_sdc_safe.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_safe')
    df_masked.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_masked')


Save counts

In [20]:
if HARDENING:
    #kernels
    kernel_sdc_critical_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_sdc_critical_counts')#df_normal.to_hdf('normal_and_attacked_dfs.h5', key='df_normal')
    kernel_sdc_safe_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_sdc_safe_counts')
    kernel_masked_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_masked_counts')
    kernel_error_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_error_counts')
    
    #opcodes
    opcode_sdc_critical_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_sdc_critical_counts')
    opcode_sdc_safe_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_sdc_safe_counts')
    opcode_masked_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_masked_counts')
    opcode_error_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_error_counts')
    
    #registers
    register_sdc_critical_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_sdc_critical_counts')
    register_sdc_safe_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_sdc_safe_counts')
    register_masked_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_masked_counts')
    register_error_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_error_counts')
else:
    #kernels
    kernel_sdc_critical_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_sdc_critical_counts')#df_normal.to_hdf('normal_and_attacked_dfs.h5', key='df_normal')
    kernel_sdc_safe_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_sdc_safe_counts')
    kernel_masked_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_masked_counts')
    kernel_error_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='kernel_error_counts')

    #opcodes
    opcode_sdc_critical_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_sdc_critical_counts')
    opcode_sdc_safe_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_sdc_safe_counts')
    opcode_masked_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_masked_counts')
    opcode_error_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='opcode_error_counts')

    #registers
    register_sdc_critical_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_sdc_critical_counts')
    register_sdc_safe_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_sdc_safe_counts')
    register_masked_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_masked_counts')
    register_error_counts.to_hdf(f'../../../kernels_sdc_critical_counts_dataframes/counts_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_counts.h5', key='register_error_counts')

# Jaccard similarity coefficient

In [15]:
def jaccard_index(golden_logits, logits):
  num_classes = golden_logits.shape[2]

  #inference is done by selecting the index along the 3rd dimension (2nd in numpy) corresponding to the highest value
  golden_inference = golden_logits.argmax(axis=2)
  inference = logits.argmax(axis=2)

  jaccard_idx = []
  for clss in range(num_classes):
    #masking: set each value in 'golden_inference' and 'inference' to:
    #         - True, if it's equal to class 'cls'
    #         - False, otherwise

    masked_golden_inference = golden_inference == clss
    masked_inference = inference == clss

    # intersection over union
    j_idx = (masked_golden_inference * masked_inference).sum() / (masked_golden_inference + masked_inference).sum()

    jaccard_idx.append(j_idx)

  return jaccard_idx


In [None]:
df_sdc_critical_j = df_sdc_critical.copy(deep=True)

ji = [min(jaccard_index(golden_probabilities, np.load(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/probabilities.npy'))) for icount in df_sdc_critical.iloc[:, 1]]

ji_bad = [i for i in ji if i < 0.99]
ji_bad

In [None]:
idxs = []
for i in range(len(ji)):
  if ji[i] in ji_bad:
    idxs.append(i)

#df_sdc_critical.iloc[idxs[1], :]
df_sdc_critical_significant = df_sdc_critical.iloc[idxs, :].copy(deep=True)
df_sdc_critical_significant

In [None]:
df_js = pd.DataFrame({'jaccard_similarity': ji})
df_sdc_critical_enhanced = pd.concat([df_sdc_critical, df_js], axis=1)

df_sdc_critical_enhanced[df_sdc_critical_enhanced['jaccard_similarity'] < 0.99]

# Change in logits (SDC-safe and SDC-critical cases)

In [19]:
def softmax(a):
  return np.exp(a)/sum(np.exp(a))

safe_logits_change_ds = {}#pd.Series(dtype=np.float64)
safe_logits_change_list = []
#probs_change_ds = {}#pd.Series(dtype=np.float64)

critical_logits_change_ds = {}
critical_logits_change_list = []
critical_probs_change_ds = {}

for icount in df_sdc_safe['icount']:
  probabilities = np.load(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/probabilities.npy')

  safe_logits_change_ds[icount] = np.linalg.norm(golden_probabilities - probabilities)/np.linalg.norm(golden_probabilities)
  safe_logits_change_list.append(safe_logits_change_ds[icount])

for icount in df_sdc_critical['icount']:
  probabilities = np.load(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{icount}/probabilities.npy')

  critical_logits_change_ds[icount] = np.linalg.norm(golden_probabilities - probabilities)/np.linalg.norm(golden_probabilities)
  critical_logits_change_list.append(critical_logits_change_ds[icount])

cell below is the same as the one above, but with some changes for checking some details

In [None]:
#golden_probabilities.shape
safe_logits_change_list

In [21]:
#critical_logits_change_list
df_critical_logits_change = pd.DataFrame({'logits_degradation': critical_logits_change_list})
df_sdc_critical_enhanced = pd.concat([df_sdc_critical_enhanced, df_critical_logits_change], axis=1)

df_safe_logits_change = pd.DataFrame({'logits_degradation': safe_logits_change_list})
df_sdc_safe_enhanced = pd.concat([df_sdc_safe, df_safe_logits_change], axis=1)

In [None]:
max_safe_logits_change = max(safe_logits_change_ds.values())

print(f'Highest change in logits in SDC-safe cases: {max(safe_logits_change_ds.values())}')
print(f'Highest change in logits in SDC-critical cases: {max(critical_logits_change_ds.values())}')#.min()

#np.linalg.norm(golden_probabilities)
#max(probs_change_ds.values())

In [None]:
#sort(safe_logits_change_ds, )
sorted_safe_logits_change = sorted(safe_logits_change_ds.items(), key=lambda x: x[1], reverse=True)
sorted_critical_logits_change = sorted(critical_logits_change_ds.items(), key=lambda x: x[1], reverse=True)

sorted_critical_logits_change
#sorted_safe_logits_change

# Confusion Matrix

In [24]:
from scipy import io, misc

def open_file(dataset):
    _, ext = os.path.splitext(dataset)
    ext = ext.lower()
    if ext == '.mat':
        # Load Matlab array
        return io.loadmat(dataset)
    else:
        raise ValueError("Unknown file format: {}".format(ext))

In [25]:
label_values_paviaU = [
            "Undefined",
            "Asphalt",
            "Meadows",
            "Gravel",
            "Trees",
            "Painted metal sheets",
            "Bare Soil",
            "Bitumen",
            "Self-Blocking Bricks",
            "Shadows",
        ]

label_values_salinas = [
            "Undefined",
            "Brocoli_green_weeds_1",
            "Brocoli_green_weeds_2",
            "Fallow",
            "Fallow_rough_plow",
            "Fallow_smooth",
            "Stubble",
            "Celery",
            "Grapes_untrained",
            "Soil_vinyard_develop",
            "Corn_senesced_green_weeds",
            "Lettuce_romaine_4wk",
            "Lettuce_romaine_5wk",
            "Lettuce_romaine_6wk",
            "Lettuce_romaine_7wk",
            "Vinyard_untrained",
            "Vinyard_vertical_trellis",
        ]

label_values_indianPines = [
            "Undefined",
            "Alfalfa",
            "Corn-notill",
            "Corn-mintill",
            "Corn",
            "Grass-pasture",
            "Grass-trees",
            "Grass-pasture-mowed",
            "Hay-windrowed",
            "Oats",
            "Soybean-notill",
            "Soybean-mintill",
            "Soybean-clean",
            "Wheat",
            "Woods",
            "Buildings-Grass-Trees-Drives",
            "Stone-Steel-Towers",
        ]

datasetPath_dict  = {'pavia_uni': 'PaviaU', 'salinas': 'Salinas', 'indianPines': 'IndianPines'}
datasetName_dict = {'pavia_uni': 'PaviaU_gt.mat', 'salinas': 'Salinas_gt.mat', 'indianPines': 'Indian_pines_gt.mat'}
datasetLabels_dict = {'pavia_uni': label_values_paviaU, 'salinas': label_values_salinas, 'indianPines': label_values_indianPines}
datasetDumb_dict = {'pavia_uni': 'paviaU_gt', 'salinas': 'salinas_gt', 'indianPines': 'indian_pines_gt'}

gt_path = f'../../../Datasets/{datasetPath_dict[DATASET]}/{datasetName_dict[DATASET]}'

gt = open_file(gt_path)[datasetDumb_dict[DATASET]]

In [26]:
from sklearn.metrics import confusion_matrix
#import visdom

def metrics(prediction, target, ignored_labels=[], n_classes=None,k=None):
    """Compute and print metrics (accuracy, confusion matrix and F1 scores).

    Args:
        prediction: list of predicted labels
        target: list of target labels
        ignored_labels (optional): list of labels to ignore, e.g. 0 for undef
        n_classes (optional): number of classes, max(target) by default
    Returns:
        accuracy, F1 score by class, confusion matrix
    """
    ignored_mask = np.zeros(target.shape[:2], dtype="bool")
    for l in ignored_labels:
        ignored_mask[target == l] = True
    ignored_mask = ~ignored_mask
    target = target[ignored_mask]
    prediction = prediction[ignored_mask]

    results = {}

    n_classes = np.max(target) + 1 if n_classes is None else n_classes

    cm = confusion_matrix(
        target,
        prediction,
        labels=range(n_classes))

    results["Confusion matrix"] = cm

    # Compute global accuracy  VP + VN / VP+VN +TN +TP
    total = np.sum(cm)
    accuracy = sum([cm[x][x] for x in range(len(cm))])
    accuracy *= 100 / float(total)

    # with open('acc150-5.txt', 'a') as f:
    #     f.write(f"K={k} : {str(accuracy)}")

    results["Accuracy"] = accuracy

    # Compute F1 score // accuratezza test    Precision = VP/ FP+TP recall = VP/VP+FN   F1 = media armonica = 2 * ((p*r)/(p+r))
    F1scores = np.zeros(len(cm))
    for i in range(len(cm)):
        try:
            F1 = 2. * cm[i, i] / (np.sum(cm[i, :]) + np.sum(cm[:, i]))
        except ZeroDivisionError:
            F1 = 0.
        F1scores[i] = F1

    results["F1 scores"] = F1scores

    pa = np.trace(cm) / float(total)
    pe = np.sum(np.sum(cm, axis=0) * np.sum(cm, axis=1)) / \
        float(total * total)
    kappa = (pa - pe) / (1 - pe)
    results["Kappa"] = kappa

    return results

In [None]:
#probs = np.load(f'../../../pca10_logs/golden_probabilities/{DATASET}_{PCA}_golden_probabilities_{MACHINE}.npy')
#probs = np.load(f'../../../pca10_hardened_logs/golden_probabilities/{DATASET}_{PCA}_hardened_golden_probabilities.npy')
probs = np.load(f'../../../{LOGS_DICT}/golden_probabilities/{DATASET}_{PCA}_golden_probabilities_{MACHINE}.npy')

label_values = datasetLabels_dict[DATASET]

prediction = np.argmax(probs, axis=-1)

run_results = metrics(prediction, gt, [0], len(label_values)) #[0]
cm = run_results["Confusion matrix"]
golden_accuracy = run_results["Accuracy"]
golden_accuracy

In [None]:
ics = []
accs = []
for ic in df_sdc_critical.iloc[:, 1]:
    probs = np.load(f'./pca_hyperspectral-group{INSTR_GROUP_DICT[INSTR_GROUP]}-model0-icount{ic}/probabilities.npy')

    label_values = datasetLabels_dict[DATASET]

    prediction = np.argmax(probs, axis=-1)

    run_results = metrics(prediction, gt, [0], len(label_values)) #[0]
    cm = run_results["Confusion matrix"]
    accuracy = run_results["Accuracy"]
    accs.append(accuracy)
    if accuracy < (golden_accuracy - 0.001):
        print((ic, accuracy))
        ics.append(ic)

In [29]:
accs_arr = np.array(accs)
golden_accs_arr = np.array([golden_accuracy for _ in accs])
acc_drop = golden_accs_arr - accs_arr

df_acc_drop = pd.DataFrame({'accuracy_drop': acc_drop})
df_accuracies = pd.DataFrame({'accuracy': accs})

df_sdc_critical_enhanced = pd.concat([df_sdc_critical_enhanced, df_accuracies, df_acc_drop], axis=1)

In [None]:
%cd ../../..

In [None]:
df_sdc_critical_enhanced

In [None]:
#df_sdc_critical_enhanced.to_hdf(f'./kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_critical_enhanced')
#df_sdc_safe_enhanced.to_hdf(f'./kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_safe_enhanced')

df_sdc_critical_enhanced.to_hdf(f'./kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_critical_enhanced')
df_sdc_safe_enhanced.to_hdf(f'./kernels_sdc_critical_counts_dataframes/complete_dataframes/{MACHINE}_{PCA}_hardened_{DATASET}_{INSTR_GROUP}_dataframes.h5', 'df_sdc_safe_enhanced')

In [None]:
#df_final = df_sdc_critical_enhanced#.copy(deep=True)
df_sdc_critical_enhanced[df_sdc_critical_enhanced['accuracy_drop'] > 0.0]