In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('../')
os.environ["CUDA_VISIBLE_DEVICES"]="0"
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"]="python"


In [3]:
from src.dataset import *
from src.concept_vectors import *
from src.util import *
from src.hierarchy import *
from src.metrics import *
import numpy as np
import matplotlib.pyplot as plt
import glob
import tensorflow as tf
from src.create_vectors import *
import json 

2024-01-15 12:03:25.125819: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
import os 
os.system("export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH")

0

In [5]:
seeds = [43,44,45]
vector_method = [load_cem_vectors_simple,load_concept2vec_vectors_simple,load_label_vectors_simple,load_tcav_vectors_simple]
vector_names = ["CEM","Concept2Vec","Label","TCAV"]

## Debugging: Check which TCAV Vectors are available

In [24]:
dataset = CUB_Dataset()
attributes = dataset.get_attributes()

In [25]:
def attribute_exists(attribute_name,suffix,seed):
    attribute_name = attribute_name.replace(" ","_")
    folder_name = "results/bases/tcav/{}{}/{}/".format(dataset.experiment_name,suffix,seed)
    for i in range(3):
        file_name = "{}_{}_{}-random500_{}-block4_conv1-linear-0.1.pkl".format(attribute_name,seed,suffix,i)

        if not os.path.exists(folder_name + file_name):
            return False
    return True

In [26]:
for attribute in attributes:
    for seed in [43,44,45]:
        for suffix in ['','_image_robustness','_image_responsiveness']:
            attribute_exists(attribute,suffix,seed)

## Evaluate All Vectors MNIST

In [7]:
dataset = MNIST_Dataset()
attributes = dataset.get_attributes()

In [28]:
for method,name in zip(vector_method,vector_names):
    print("Computing {}".format(name))
    start = time.time()

    results = compute_all_metrics(method,
                                        dataset,
                                        attributes,
                                        seeds)
    print("Method {}: {}".format(name,results))
    
    name_lower = name.lower()
    w = open("results/evaluation/{}_{}.txt".format(dataset.experiment_name,name_lower),"w")
    for key in results:
        w.write("{}: {}\n".format(key,results[key]))
    w.close()


Computing CEM


  all_concept_embeddings = np.array([embedding_method(i,dataset,"",seed=seed) for i in dataset.get_attributes()])


Truthfulness: (0.8666666666666667, 0.1885618083164127)
Image Robustness: (0.19999999999999998, 0.08164965809277264)
Image Responsiveness: (0.9500000000000001, 0.07071067811865477)
Stability: (0.1333333333333333, 0.09428090415820632)
Method CEM: {'Truthfulness': (0.8666666666666667, 0.1885618083164127), 'Image Robustness': (0.19999999999999998, 0.08164965809277264), 'Image Responsiveness': (0.9500000000000001, 0.07071067811865477), 'Stability': (0.1333333333333333, 0.09428090415820632)}
Computing Concept2Vec
Truthfulness: (1.0, 0.0)
Image Robustness: (0.0, 0.0)
Image Responsiveness: (0.9500000000000001, 0.04082482904638629)
Stability: (0.0, 0.0)
Method Concept2Vec: {'Truthfulness': (1.0, 0.0), 'Image Robustness': (0.0, 0.0), 'Image Responsiveness': (0.9500000000000001, 0.04082482904638629), 'Stability': (0.0, 0.0)}
Computing Label
Truthfulness: (1.0, 0.0)
Image Robustness: (0.0, 0.0)
Image Responsiveness: (1.0, 0.0)
Stability: (0.0, 0.0)
Method Label: {'Truthfulness': (1.0, 0.0), 'Image

In [8]:
baseline_distances = np.zeros((len(attributes),len(attributes)))
for i, attribute_1 in enumerate(attributes):
    for j, attribute_2 in enumerate(attributes):
        baseline_distances[i][j] = (1-int(attribute_1[0] == attribute_2[0]))

In [9]:
distance_by_method_mnist = {}
for function,name in zip(vector_method,vector_names):
    h_list = [flat_distance_to_square(get_concept_distances(function,MNIST_Dataset(),'',MNIST_Dataset().get_attributes(),seed)) for seed in [43,44,45]]
    distance_by_method_mnist[name] = [embedding_distance(h,baseline_distances,k=1) for h in h_list]
    distance_by_method_mnist[name] = (np.mean(distance_by_method_mnist[name]),np.std(distance_by_method_mnist[name]))

In [10]:
json.dump(distance_by_method_mnist,open('results/evaluation/ablation/distance_mnist.json','w'))

In [11]:
method_list = [load_label_vectors_simple,load_cem_vectors_simple,load_concept2vec_vectors_simple,load_tcav_vectors_simple]
name_list = ['label','cem','concept2vec','tcav']
dataset = MNIST_Dataset()

agreement_by_method = {}

for method,name in zip(method_list,name_list):
    agreement_by_method[name] = []

    for seed in [43,44,45]:
        all_vectors = [np.mean(np.array(method(a,dataset,"",seed)),axis=0) for a in dataset.get_attributes()]
        all_vectors = np.array(all_vectors)

        closest_vectors = []

        # Iterate through each vector in the array
        for i in range(len(all_vectors)):
            current_vector = all_vectors[i]
            
            # Compute cosine similarity with all other vectors
            similarities = [1 - cosine(current_vector, other_vector) for other_vector in all_vectors]

            # Find the index of the vector with the maximum cosine similarity (excluding the current vector)
            closest_index = np.argmax(similarities[:i] + similarities[i+1:])  # Exclude the current vector
            
            if closest_index >= i:
                closest_index += 1

            # Append the closest vector to the list
            closest_vectors.append(closest_index)

        # Convert the list to a NumPy array if needed
        closest_vectors = np.array(closest_vectors)
        correct_vectors = []

        for i in range(0,len(dataset.get_attributes()),2):
            correct_vectors.append(i+1)
            correct_vectors.append(i)
        
        agreement_by_method[name].append(float(np.sum(np.array(closest_vectors) == np.array(correct_vectors))/len(correct_vectors)))
json.dump(agreement_by_method,open('results/evaluation/ablation/agreement_mnist.json','w'))

## Evaluate all Vectors CUB

In [30]:
dataset = CUB_Dataset()
attributes = dataset.get_attributes()
seeds = [43,44,45]

In [38]:
for method,name in zip(vector_method[::-1],vector_names[::-1]):
    print("Computing {}".format(name))
    start = time.time()

    results = compute_all_metrics(method,
                                        dataset,
                                        attributes,
                                        seeds)
    print("Method {}: {}".format(name,results))
    
    name_lower = name.lower()
    w = open("results/evaluation/{}_{}.txt".format(dataset.experiment_name,name_lower),"w")
    for key in results:
        w.write("{}: {}\n".format(key,results[key]))
    w.close()
    

Computing TCAV


  all_concept_embeddings = np.array([embedding_method(i,dataset,"",seed=seed) for i in dataset.get_attributes()])


Truthfulness: (0.0755952380952381, 0.008908708063747483)
Image Robustness: (0.9970238095238096, 0.002430049347999176)
Image Responsiveness: (0.9880952380952381, 0.004860098695998397)
Stability: (0.9890873015873017, 0.0037119616932281036)
Method TCAV: {'Truthfulness': (0.0755952380952381, 0.008908708063747483), 'Image Robustness': (0.9970238095238096, 0.002430049347999176), 'Image Responsiveness': (0.9880952380952381, 0.004860098695998397), 'Stability': (0.9890873015873017, 0.0037119616932281036)}
Computing Label
Truthfulness: (0.7321428571428571, 0.0)
Image Robustness: (0.0267857142857143, 0.0)
Image Responsiveness: (0.9672619047619048, 0.0)
Stability: (0.0, 0.0)
Method Label: {'Truthfulness': (0.7321428571428571, 0.0), 'Image Robustness': (0.0267857142857143, 0.0), 'Image Responsiveness': (0.9672619047619048, 0.0), 'Stability': (0.0, 0.0)}
Computing Concept2Vec
Truthfulness: (0.41130952380952385, 0.012400396819047418)
Image Robustness: (0.6865079365079364, 0.018559808466140564)
Image 

  all_concept_embeddings = np.array([embedding_method(i,dataset,"",seed=seed) for i in dataset.get_attributes()])


Truthfulness: (0.05535714285714286, 0.0025253813613805246)
Image Robustness: (0.9751984126984127, 0.01148396518133952)
Image Responsiveness: (0.9771825396825397, 0.008534052844288337)
Stability: (0.9722222222222222, 0.0037119616932281036)
Method CEM: {'Truthfulness': (0.05535714285714286, 0.0025253813613805246), 'Image Robustness': (0.9751984126984127, 0.01148396518133952), 'Image Responsiveness': (0.9771825396825397, 0.008534052844288337), 'Stability': (0.9722222222222222, 0.0037119616932281036)}


In [None]:
baseline_distances_color = np.zeros((len(attributes),len(attributes)))
for i, attribute_1 in enumerate(attributes):
    for j, attribute_2 in enumerate(attributes):
        baseline_distances_color[i][j] = (1-int(attribute_1.split("::")[1] == attribute_2.split("::")[1])) + abs(random.random()/100)*int(attribute_1 != attribute_2)

In [None]:
distance_by_method_cub = {}
for function,name in zip(vector_method,vector_names):
    h_list = [flat_distance_to_square(get_concept_distances(function,cub,'',cub_attributes,seed)) for seed in [43,44,45]]
    distance_by_method_cub[name] = [embedding_distance(h,baseline_distances_color,k=3) for h in h_list]
    distance_by_method_cub[name] = (np.mean(distance_by_method_cub[name]),np.std(distance_by_method_cub[name]))

In [None]:
json.dump(distance_by_method_cub,open('results/evaluation/ablation/distance_cub_second_part.json','w'))

In [None]:
top_pairs = get_top_k_pairs(baseline_distances_color,k=3)
top_pairs = [(attributes[i[0]],attributes[i[1]]) for i in top_pairs]

In [None]:
our_top_pairs = get_top_k_pairs(flat_distance_to_square(get_concept_distances(load_label_vectors_simple,dataset,'',attributes,43)))

In [None]:
our_top_pairs = [(attributes[i[0]],attributes[i[1]]) for i in our_top_pairs]

In [None]:
baseline_distances_attribute = np.zeros((len(attributes),len(attributes)))

In [None]:
for i, attribute_1 in enumerate(attributes):
    for j, attribute_2 in enumerate(attributes):
        baseline_distances_attribute[i][j] = (1-int(attribute_1.split("::")[0] == attribute_2.split("::")[0])) + abs(np.random.random()/100)*int(attribute_1 != attribute_2)

In [None]:
distance_by_method_cub = {}
for function,name in zip(vector_method,vector_names):
    h_list = [flat_distance_to_square(get_concept_distances(function,cub,'',cub_attributes,seed)) for seed in [43,44,45]]
    distance_by_method_cub[name] = [embedding_distance(h,baseline_distances_attribute,k=3) for h in h_list]
    distance_by_method_cub[name] = (np.mean(distance_by_method_cub[name]),np.std(distance_by_method_cub[name]))

In [None]:
json.dump(distance_by_method_cub,open('results/evaluation/ablation/distance_cub_first_part.json','w'))

## Evalaute all Vectors DSprites

In [39]:
dataset = DSprites_Dataset()
attributes = dataset.get_attributes()

In [45]:
for method,name in zip(vector_method[::-1],vector_names[::-1]):
    print("Computing {}".format(name))
    start = time.time()

    results = compute_all_metrics(method,
                                        dataset,
                                        attributes,
                                        seeds)
    print("Method {}: {}".format(name,results))
    
    name_lower = name.lower()
    w = open("results/evaluation/{}_{}.txt".format(dataset.experiment_name,name_lower),"w")
    for key in results:
        w.write("{}: {}\n".format(key,results[key]))
    w.close()

Computing TCAV
Truthfulness: (0.30370370370370375, 0.013857990321384989)
Image Robustness: (0.6049382716049382, 0.04860498687661615)
Image Responsiveness: (0.8148148148148148, 0.05237828008789238)
Stability: (0.4444444444444444, 0.04000457221239422)
Method TCAV: {'Truthfulness': (0.30370370370370375, 0.013857990321384989), 'Image Robustness': (0.6049382716049382, 0.04860498687661615), 'Image Responsiveness': (0.8148148148148148, 0.05237828008789238), 'Stability': (0.4444444444444444, 0.04000457221239422)}
Computing Label
Truthfulness: (0.5, 0.0)
Image Robustness: (0.8148148148148149, 0.0)
Image Responsiveness: (0.7777777777777778, 0.0)
Stability: (0.0, 0.0)
Method Label: {'Truthfulness': (0.5, 0.0), 'Image Robustness': (0.8148148148148149, 0.0), 'Image Responsiveness': (0.7777777777777778, 0.0), 'Stability': (0.0, 0.0)}
Computing Concept2Vec
Truthfulness: (0.3074074074074075, 0.027715980642769932)
Image Robustness: (0.8765432098765432, 0.04364856673991033)
Image Responsiveness: (0.8395

  all_concept_embeddings = np.array([embedding_method(i,dataset,"",seed=seed) for i in dataset.get_attributes()])


Truthfulness: (0.2703703703703704, 0.010475656017578472)
Image Robustness: (0.7592592592592592, 0.15930231976004866)
Image Responsiveness: (0.8765432098765432, 0.05310077325334951)
Stability: (0.29012345679012347, 0.031475429096251756)
Method CEM: {'Truthfulness': (0.2703703703703704, 0.010475656017578472), 'Image Robustness': (0.7592592592592592, 0.15930231976004866), 'Image Responsiveness': (0.8765432098765432, 0.05310077325334951), 'Stability': (0.29012345679012347, 0.031475429096251756)}


## Evaluate all Vectors Chexpert

In [46]:
dataset = Chexpert_Dataset()
attributes = dataset.get_attributes()

In [47]:
for method,name in zip(vector_method,vector_names):
    print("Computing {}".format(name))
    start = time.time()

    results = compute_all_metrics(method,
                                        dataset,
                                        attributes,
                                        seeds)
    print("Method {}: {}".format(name,results))
    
    name_lower = name.lower()
    w = open("results/evaluation/{}_{}.txt".format(dataset.experiment_name,name_lower),"w")
    for key in results:
        w.write("{}: {}\n".format(key,results[key]))
    w.close()

Computing CEM


  all_concept_embeddings = np.array([embedding_method(i,dataset,"",seed=seed) for i in dataset.get_attributes()])


Truthfulness: (0.4051282051282052, 0.05076664070057265)
Image Robustness: (0.5042735042735043, 0.11530544925839355)
Image Responsiveness: (0.7264957264957265, 0.12791991065893818)
Stability: (0.5555555555555557, 0.06729921259839156)
Method CEM: {'Truthfulness': (0.4051282051282052, 0.05076664070057265), 'Image Robustness': (0.5042735042735043, 0.11530544925839355), 'Image Responsiveness': (0.7264957264957265, 0.12791991065893818), 'Stability': (0.5555555555555557, 0.06729921259839156)}
Computing Concept2Vec
Truthfulness: (0.441025641025641, 0.06322475900481)
Image Robustness: (0.358974358974359, 0.07548513560963974)
Image Responsiveness: (0.7606837606837606, 0.03197997766473456)
Stability: (0.5726495726495726, 0.052687299170675)
Method Concept2Vec: {'Truthfulness': (0.441025641025641, 0.06322475900481), 'Image Robustness': (0.358974358974359, 0.07548513560963974), 'Image Responsiveness': (0.7606837606837606, 0.03197997766473456), 'Stability': (0.5726495726495726, 0.052687299170675)}
Co

## Explain why CEM Vectors are Random

In [50]:
results_by_method = {'cem': {}, 
'tcav': {}, 
'label': {}, 
'concept2vec': {}}

for method,name in zip([load_cem_vectors_simple,load_tcav_vectors_simple,load_label_vectors_simple,load_concept2vec_vectors_simple],['cem','tcav','label','concept2vec']):
    for dataset in [CUB_Dataset(),MNIST_Dataset(),Chexpert_Dataset(),DSprites_Dataset()]:
        a = dataset.get_attributes() 

        similarities = []
        avg_same_similarity = []
        stds = []

        for seed in [43,44,45]:
            vectors = [np.mean(method(attribute,dataset,"",seed),axis=0) for attribute in a]
            for i in vectors:
                stds.append(np.std(i))
            cosine_similarities_max = []
            for i in range(len(vectors)):
                cosine_similarities = max([1-cosine(vectors[i],vectors[j]) for j in range(len(vectors)) if i!=j])
                cosine_similarities_max.append(cosine_similarities)
            similarities.append(np.mean(cosine_similarities_max))

        d = len(vectors[0])
        std = np.mean(stds)
        mean = np.mean(similarities)
        z_score = (mean-0)/(d*std**4/(3**.5))
        z_score *= len(attribute)**.5

        results_by_method[name][dataset.experiment_name] = {
            'dimension': d,
            'std': float(std), 
            'mean_similarity': float(mean), 
            'std_similarity': float(np.std(similarities))
        }
json.dump(results_by_method,open('results/evaluation/ablation/randomness_cem_tcav.json','w'))

In [61]:
results_by_method['tcav']['cub']

{'dimension': 401408,
 'std': 0.15072173073176476,
 'mean_similarity': 0.6544520321994527,
 'std_similarity': 0.010189191367382824}

In [26]:
CUB_Dataset().get_attributes()

['has_bill_shape::dagger',
 'has_bill_shape::hooked_seabird',
 'has_bill_shape::all-purpose',
 'has_bill_shape::cone',
 'has_wing_color::brown',
 'has_wing_color::grey',
 'has_wing_color::yellow',
 'has_wing_color::black',
 'has_wing_color::white',
 'has_wing_color::buff',
 'has_upperparts_color::brown',
 'has_upperparts_color::grey',
 'has_upperparts_color::yellow',
 'has_upperparts_color::black',
 'has_upperparts_color::white',
 'has_upperparts_color::buff',
 'has_underparts_color::brown',
 'has_underparts_color::grey',
 'has_underparts_color::yellow',
 'has_underparts_color::black',
 'has_underparts_color::white',
 'has_underparts_color::buff',
 'has_breast_pattern::solid',
 'has_breast_pattern::striped',
 'has_breast_pattern::multi-colored',
 'has_back_color::brown',
 'has_back_color::grey',
 'has_back_color::yellow',
 'has_back_color::black',
 'has_back_color::white',
 'has_back_color::buff',
 'has_tail_shape::notched_tail',
 'has_upper_tail_color::brown',
 'has_upper_tail_color::

In [41]:
load_concept2vec_vectors_simple("5_color",MNIST_Dataset(),"",43).shape

(1, 32)

In [40]:
create_concept2vec(MNIST_Dataset(),"",43)



['0_color',
 '0_number',
 '1_color',
 '1_number',
 '2_color',
 '2_number',
 '3_color',
 '3_number',
 '4_color',
 '4_number',
 '5_color',
 '5_number',
 '6_color',
 '6_number',
 '7_color',
 '7_number',
 '8_color',
 '8_number',
 '9_color',
 '9_number']

In [15]:
load_concept2vec_vectors_simple

<function src.concept_vectors.load_concept2vec_vectors_simple(attribute, dataset, suffix, seed=-1)>

In [None]:
results_by_method['label']['mnist']['z_score']

{'cem': {'cub': {'dimension': 16,
   'std': 1.0710860318615494,
   'mean_similarity': 0.04315607335729071,
   'z_score': 0.019763597090416498},
  'mnist': {'dimension': 16,
   'std': 0.3603748546133479,
   'mean_similarity': -0.23096284300518896,
   'z_score': -4.192854909517109},
  'chexpert': {'dimension': 32,
   'std': 0.7383034818894332,
   'mean_similarity': 0.2269337003345913,
   'z_score': 0.1601089490787698},
  'dsprites': {'dimension': 32,
   'std': 0.8814139286781008,
   'mean_similarity': 0.2441086504661453,
   'z_score': 0.0579192863227851}},
 'tcav': {'cub': {'dimension': 401408,
   'std': 0.15072173073176479,
   'mean_similarity': 0.5211380636473188,
   'z_score': 0.024260779783046957},
  'mnist': {'dimension': 401408,
   'std': 0.2327976155578189,
   'mean_similarity': 0.6249369905391176,
   'z_score': 0.0025968192268532434},
  'chexpert': {'dimension': 401408,
   'std': 0.08216722796770931,
   'mean_similarity': 0.6034696662843769,
   'z_score': 0.22124890783590814},
  

## Analyze impact of vector metric

In [None]:
hierarchy_by_metric = {}
dataset = CUB_Dataset()
attributes = dataset.get_attributes()
for metric in ['euclidean','cosine','manhattan']:
    hierarchy_by_metric[metric] = {}

    for function,name in zip([load_label_vectors_simple,
    load_shapley_vectors_simple, 
    load_cem_vectors_simple,
    load_concept2vec_vectors_simple
    ],['label','shapley','cem','concept2vec']):
        hierarchy_by_metric[metric][name] = {}
        for seed in [43,44,45]:
            hierarchy_by_metric[metric][name][seed] = flat_distance_to_square(get_concept_distances(function,dataset,'',attributes,seed,metric=metric))

In [None]:
avg_pairwise_distance = {}
for name in ['label','shapley','cem','concept2vec']:
    avg_pairwise_distance[name] = {}
    for metric_1 in ['euclidean','cosine','manhattan']:
        avg_pairwise_distance[name][metric_1] = {}
        for metric_2 in ['euclidean','cosine','manhattan']:
            h1 = hierarchy_by_metric[metric_1][name]
            h2 = hierarchy_by_metric[metric_2][name]
            results = [embedding_distance(h1[seed],h2[seed],k=3) for seed in [43,44,45]]
            avg_pairwise_distance[name][metric_1][metric_2] = (np.mean(results),np.std(results))

In [None]:
json.dump(avg_pairwise_distance,open('results/evaluation/ablation/metric_distances.json','w'))

In [None]:
hierarchy_object_by_metric = {}
dataset = CUB_Dataset()
attributes = dataset.get_attributes()
for metric in ['euclidean','cosine','manhattan']:
    hierarchy_object_by_metric[metric] = {}

    for function,name in zip([load_label_vectors_simple,
    load_shapley_vectors_simple, 
    load_cem_vectors_simple,
    load_concept2vec_vectors_simple
    ],['label','shapley','cem','concept2vec']):
        hierarchy_object_by_metric[metric][name] = {}
        for seed in [43,44,45]:
            hierarchy_object_by_metric[metric][name][seed] = create_hierarchy(create_ward_hierarchy,function,dataset,'',attributes,seed,metric=metric) 

### Investigate why this occurs

In [None]:
cosine_hierarchy = hierarchy_object_by_metric['cosine']['label'][43]
euclidean_hierarchy = hierarchy_object_by_metric['euclidean']['label'][43]

In [None]:
top_k_euclidean = get_top_k_pairs(hierarchy_by_metric['euclidean']['label'][43],k=3)
top_k_cosine = get_top_k_pairs(hierarchy_by_metric['cosine']['label'][43],k=3)

top_k_euclidean = [(attributes[i[0]],attributes[i[1]]) for i in top_k_euclidean]
top_k_cosine = [(attributes[i[0]],attributes[i[1]]) for i in top_k_cosine]

In [None]:
all_distances = [np.mean([embedding_distance(hierarchy_by_metric['cosine']['label'][seed],hierarchy_by_metric['euclidean']['label'][seed],k=k) for seed in [43,44,45]]) for k in range(1,110)]

In [None]:
percent_euclidean_agree = len([i for i in top_k_euclidean if i[0].split("::")[1] == i[1].split("::")[1]])/len(top_k_euclidean)
percent_cosine_agree = len([i for i in top_k_cosine if i[0].split("::")[1] == i[1].split("::")[1]])/len(top_k_euclidean)
percent_euclidean_agree,percent_cosine_agree

(0.7053571428571429, 0.7291666666666666)

In [None]:
all_top_k_euclidean = []
all_top_k_cosine = []

for seed in [43,44,45]:
    top_k_euclidean = get_top_k_pairs(hierarchy_by_metric['euclidean']['label'][seed],k=3)
    top_k_cosine = get_top_k_pairs(hierarchy_by_metric['cosine']['label'][seed],k=3)

    top_k_euclidean = [(attributes[i[0]],attributes[i[1]]) for i in top_k_euclidean]
    top_k_cosine = [(attributes[i[0]],attributes[i[1]]) for i in top_k_cosine]

    all_top_k_euclidean += top_k_euclidean
    all_top_k_cosine += top_k_cosine


percent_euclidean_agree = len([i for i in all_top_k_euclidean if i[0].split("::")[1] == i[1].split("::")[1]])/len(top_k_euclidean)
percent_cosine_agree = len([i for i in all_top_k_cosine if i[0].split("::")[1] == i[1].split("::")[1]])/len(top_k_euclidean)
percent_euclidean_agree/3, percent_cosine_agree/3

(0.7053571428571428, 0.7291666666666666)

In [None]:
json.dump({
    'all_distances': all_distances, 
    'top_k_euclidean': top_k_euclidean, 
    'top_k_cosine': top_k_cosine, 
    'percent_euclidean_agree': percent_euclidean_agree/3, 
    'percent_cosine_agree': percent_cosine_agree/3, 
},open('results/evaluation/ablation/distance_cosine_euclidean_top_k.json','w'))

## Analyze Hierarchy Similairty

In [None]:
hierarchy_by_dataset = defaultdict(lambda: defaultdict(dict))
for dataset_function,dataset_name in zip([CUB_Dataset, MNIST_Dataset, DSprites_Dataset, Chexpert_Dataset],['cub','mnist','dsprites','chexpert']):
    dataset = dataset_function()
    attributes = dataset.get_attributes()

    for function,name in zip([load_label_vectors_simple,
    load_shapley_vectors_simple, 
    load_cem_vectors_simple,
    load_concept2vec_vectors_simple
    ],['label','shapley','cem','concept2vec']):
        hierarchy_by_metric[metric][name] = {}
        for seed in [43,44,45]:
            hierarchy_by_dataset[dataset_name][name][seed] = flat_distance_to_square(get_concept_distances(function,dataset,'',attributes,seed))


In [None]:
distance_by_dataset = defaultdict(lambda: defaultdict(dict))

for dataset in ['cub','mnist','dsprites','chexpert']:
    for name in ['cem','shapley','label','concept2vec']:
        for name_2 in ['cem','shapley','label','concept2vec']:
            h1 = hierarchy_by_dataset[dataset][name]
            h2 = hierarchy_by_dataset[dataset][name_2]
            distance_by_dataset[dataset][name][name_2] = [embedding_distance(h1[seed],h2[seed],k=3) for seed in [43,44,45]]


In [None]:
distances_cub = np.array([[distance_by_dataset['cub'][i][j] for j in distance_by_dataset['cub'][i]] for i in distance_by_dataset['cub']])
distances_cub = np.mean(distances_cub,axis=2)

In [None]:
labels = ['cem','shapley','label','concept2vec']

In [None]:
json.dump(distances_cub.tolist(), open('results/evaluation/ablation/distance_between_hierarchies.json','w'))

## CUB Ablation Studies

### Evaluation at different noise levels

In [17]:
dataset = CUB_Dataset()
attributes = dataset.get_attributes()

In [22]:
embedding_method = load_shapley_vectors_simple
name = "Shapley"
random_seeds = [43]

In [23]:
start = time.time()

results = {}

for flip_prob in ['0.01','0.05','0.1']:
    results['flip_prob_{}'.format(flip_prob)] = compare_same_images_by_suffix(embedding_method,
                                         dataset,attributes,random_seeds,"_flip_{}".format(flip_prob),
                                        baseline_hierarchies=None)

for noise in [25,50,100]:
    results['noise_{}'.format(flip_prob)] = compare_same_images_by_suffix(embedding_method,
                                         dataset,attributes,random_seeds,"_noise_{}".format(noise),
                                        baseline_hierarchies=None)

w = open("results/evaluation/cub_noise_ablation.txt","w")
for key in results:
    w.write("{}: {}\n".format(key,results[key]))
w.close()

### See how truthfulness hyperparameters impact things

In [43]:
dataset = CUB_Dataset()
attributes = dataset.get_attributes()

results = {}

for compare_concept in [1,3,5,7]:
    results[compare_concept] = truthfulness_metric_shapley(load_shapley_vectors_simple,dataset,attributes,
                                                           [43,44,45],model_name="VGG16",compare_concepts=compare_concept)

Found 1198 validated image filenames belonging to 200 classes.
Found 1198 validated image filenames belonging to 200 classes.
Found 1198 validated image filenames belonging to 200 classes.
Found 1198 validated image filenames belonging to 200 classes.


In [44]:
w = open("results/evaluation/cub_truthfulness_ablation.txt","w")
for key in results:
    w.write("{}: {}\n".format(key,results[key]))
w.close()