### Create a dataframe containing all SHAP maps for all models

In [1]:
import pandas as pd
import os

shap_map_dir = "../xai_maps/xai_output"
all_files = os.listdir(shap_map_dir)
relevant_filenames = [filename for filename in all_files if "shap_maps.pickle" in filename]
model_names = [filename.split("_")[0] for filename in relevant_filenames]
relevant_filenames, model_names

(['LXMERT_shap_maps.pickle',
  'CLIP_shap_maps.pickle',
  'SigLip_shap_maps.pickle',
  'FLAVA_shap_maps.pickle'],
 ['LXMERT', 'CLIP', 'SigLip', 'FLAVA'])

Create shared DF with model names and shap outputs

In [2]:
model_shap_dfs = []

for model_name, filename in zip(model_names, relevant_filenames):

    # get the model SHAP dataframe
    model_shap_df = pd.read_pickle(os.path.join(shap_map_dir, filename))
    # add column with model name
    model_shap_df['model'] = model_name
    # add to list of dataframes
    model_shap_dfs.append(model_shap_df)

#combine all dataframes
combi_shap_df = pd.concat(model_shap_dfs)
combi_shap_df['filename'] = [os.path.basename(path) for path in combi_shap_df['img_path']]
combi_shap_df

Unnamed: 0,img_path,caption,foil,linguistic_phenomena,model_shap_map,n_image_variants,model,filename
0,../../VALSE_data/images/coco2017/000000411953.jpg,a guy plays guitar on the stage center,A guy plays guitar off the stage center,relations,"[[0.0010528630991757382, 0.0001818203072616597...",172,LXMERT,000000411953.jpg
1,../../VALSE_data/images/swig/licking_291.jpg,A cow licks its fur.,A cow wrinkles its fur.,actions,"[[-0.055763863715583284, 0.022721240122336894,...",172,LXMERT,licking_291.jpg
2,../../VALSE_data/images/swig/counting_103.jpg,A person counts money.,A person pays money.,actions,"[[-0.0010253147265757434, 0.001289952102524694...",172,LXMERT,counting_103.jpg
3,../../VALSE_data/images/v7w/v7w_2411632.jpg,There are no people in the photo.,There is at least one person in the photo.,existence,"[[0.000928994850255549, -0.011200517765246332,...",172,LXMERT,v7w_2411632.jpg
4,../../VALSE_data/images/swig/urinating_251.jpg,A man urinates against a wall.,A man skates against a wall.,actions,"[[0.022377102646714775, 0.10345780447460129, -...",172,LXMERT,urinating_251.jpg
...,...,...,...,...,...,...,...,...
94,../../VALSE_data/images/v7w/v7w_2363335.jpg,There is a mast on the closest boat.,There is no mast on the closest boat.,existence,"[[0.05296492576599121, -0.05216550827026367, -...",172,FLAVA,v7w_2363335.jpg
95,../../VALSE_data/images/v7w/v7w_2367084.jpg,There is a giraffe.,There is no giraffe.,existence,"[[0.7628903388977051, -0.021378040313720703, -...",172,FLAVA,v7w_2367084.jpg
96,../../VALSE_data/images/v7w/v7w_2373121.jpg,There are no clouds in the sky.,There is at least one cloud in the sky.,existence,"[[-0.5447151064872742, -0.09526211023330688, -...",172,FLAVA,v7w_2373121.jpg
97,../../VALSE_data/images/v7w/v7w_2390850.jpg,There are no people in the water.,There is at least one person in the water.,existence,"[[-0.042334675788879395, 0.08342540264129639, ...",172,FLAVA,v7w_2390850.jpg


Add normalized and nonzero SHAP column: setting all negative values to positive and normalizing

In [3]:
import numpy as np

new_shap_maps = []

for shap_array in combi_shap_df['model_shap_map']:
    
    new_shap_array = shap_array.copy()
    # new_shap_array[new_shap_array<0] = 0

    new_shap_array = np.abs(new_shap_array)


    if np.sum(new_shap_array)>0:
        new_shap_array = new_shap_array/np.sum(new_shap_array)
    new_shap_maps.append(new_shap_array)

combi_shap_df['model_shap_positive_normalized'] = new_shap_maps

In [4]:
combi_shap_df.to_pickle("1_all_xai_maps.pickle")

In [5]:
for new_shap in combi_shap_df['model_shap_positive_normalized']:
    if np.sum(new_shap) == 0:
        print("PROBLEM: THERE IS A TOTALLY EMTPY SHAP MAP")

In [6]:
combi_shap_df['model_shap_map'][5]

5    [[0.018459939998592745, 0.02855476230934073, -...
5    [[-0.018877267837524414, 0.07358980178833008, ...
5    [[0.5498674213886261, 0.6956070959568024, 0.44...
5    [[-0.16100779175758362, 0.0764375627040863, -0...
Name: model_shap_map, dtype: object

In [7]:
combi_shap_df['model_shap_positive_normalized'][5]

5    [[0.034739605778902866, 0.05373696695722698, 0...
5    [[0.006880996059746134, 0.02682438690287738, 0...
5    [[0.09857956541765339, 0.12470759778363533, 0....
5    [[0.02107458748241377, 0.010005044380554835, 0...
Name: model_shap_positive_normalized, dtype: object