In [1]:
import sys
import os
import numpy as np
import sklearn.metrics as metrics
from tcav import utils
import tcav.utils_plot as utils_plot # utils_plot requires matplotlib
import tensorflow as tf
import ace_helpers
from ace import ConceptDiscovery
import os
import shutil
import random

In [2]:
def create_image_folders(source_path, num_random_exp, max_imgs):
  # Get all image paths from male and female folders
  image_paths = []
  for gender in ["male", "female"]:
    gender_path = os.path.join(source_path, gender)
    for filename in os.listdir(gender_path):
      if filename.endswith(('.jpg', '.jpeg', '.png')):  # Add other image extensions if needed
        image_paths.append(os.path.join(gender_path, filename))

  # Create "random_discovery" folders
    folder_name = "random_discovery"
    folder_path = os.path.join(source_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    selected_images = random.sample(image_paths, max_imgs)
    for image_path in selected_images:
      shutil.copy(image_path, folder_path)

  # Create "random500_" folders
  for i in range(num_random_exp):
    folder_name = "random500_" + str(i)
    folder_path = os.path.join(source_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    selected_images = random.sample(image_paths, 500)
    for image_path in selected_images:
      shutil.copy(image_path, folder_path)

# Example usage:
source_path = 'part1' # Replace with the actual path
create_image_folders(source_path, num_random_exp, max_imgs)

In [3]:
# !python3 ace_run.py --source_dir part1 --working_dir SAVE_DIR --model_to_run custom --model_path models/gender_classification.h5 --target_class male --labels_path labels.txt --bottlenecks global_average_pooling2d --num_random_exp 20 --max_imgs 50 --min_imgs 30

In [4]:
#Set variables for running code using the "female class "
source_dir = 'part1'
working_dir = 'male_save_dir_11.2'
model_to_run = 'custom'
model_path = "models/gender_classification.h5"
labels_path = 'labels.txt'
bottlenecks= 'global_average_pooling2d_4' 
num_random_exp = 20 
max_imgs = 50 
min_imgs = 30
target_class = 'male'

In [5]:
#Set up folders
discovered_concepts_dir = os.path.join(working_dir, 'concepts/')
results_dir = os.path.join(working_dir, 'results/')
cavs_dir = os.path.join(working_dir, 'cavs/')
activations_dir = os.path.join(working_dir, 'acts/')
results_summaries_dir = os.path.join(working_dir, 'results_summaries/')
if tf.io.gfile.exists(working_dir):
    tf.io.gfile.rmtree(working_dir)
tf.io.gfile.makedirs(working_dir)
tf.io.gfile.makedirs(discovered_concepts_dir)
tf.io.gfile.makedirs(results_dir)
tf.io.gfile.makedirs(cavs_dir)
tf.io.gfile.makedirs(activations_dir)
tf.io.gfile.makedirs(results_summaries_dir)
random_concept = 'random_discovery'  # Random concept for statistical testing

In [6]:
#Create session and model
sess = utils.create_session()
mymodel = ace_helpers.make_model(
    sess, model_to_run, model_path, labels_path)

Instructions for updating:
Colocations handled automatically by placer.


2024-11-02 14:07:45.940611: W tensorflow/c/c_api.cc:300] Operation '{name:'block_15_expand_BN/moving_mean/Assign' id:1957 op device:{requested: '', assigned: ''} def:{{{node block_15_expand_BN/moving_mean/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](block_15_expand_BN/moving_mean, block_15_expand_BN/moving_mean/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2024-11-02 14:07:46.204908: W tensorflow/c/c_api.cc:300] Operation '{name:'Conv_1_bn/beta/v/Assign' id:4011 op device:{requested: '', assigned: ''} def:{{{node Conv_1_bn/beta/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](Conv_1_bn/beta/v, Conv_1_bn/beta/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a s

In [7]:
# Creating the ConceptDiscovery class instance
cd = ConceptDiscovery(
    mymodel,
    target_class,
    random_concept,
    bottlenecks.split(','),
    sess,
    source_dir,
    activations_dir,
    cavs_dir,
    num_random_exp=num_random_exp,
    channel_mean=True,
    max_imgs=max_imgs,
    min_imgs=min_imgs,
    num_discovery_imgs =max_imgs)

In [8]:
  # Creating the dataset of image patches
  cd.create_patches(param_dict={'n_segments': [15, 50, 80]})

In [9]:
  # Saving the concept discovery target class images
  image_dir = os.path.join(discovered_concepts_dir, 'images')
  tf.io.gfile.makedirs(image_dir)
  ace_helpers.save_images(image_dir,
                            (cd.discovery_images * 256).astype(np.uint8))

In [10]:
 # Discovering Concepts
cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25})
del cd.dataset  # Free memory
del cd.image_numbers
del cd.patches

  super()._check_params_vs_input(X, default_n_init=10)


In [11]:
  # Save discovered concept images (resized and original sized)
  ace_helpers.save_concepts(cd, discovered_concepts_dir)

In [12]:
  # Calculating CAVs and TCAV scores
  cav_accuraciess = cd.cavs(min_acc=0.0)
  scores = cd.tcavs(test=False)
 

INFO:tensorflow:Training CAV ['male_concept1', 'random500_19'] - global_average_pooling2d_4 alpha 0.01
INFO:tensorflow:training with alpha=0.01
INFO:tensorflow:acc per class {'male_concept1': 1.0, 'random500_19': 1.0, 'overall': 1.0}
INFO:tensorflow:Training CAV ['male_concept1', 'random500_4'] - global_average_pooling2d_4 alpha 0.01
INFO:tensorflow:training with alpha=0.01
INFO:tensorflow:CAV accuracies: {'male_concept1': 1.0, 'random500_19': 1.0, 'overall': 1.0}
INFO:tensorflow:Training CAV ['male_concept1', 'random500_10'] - global_average_pooling2d_4 alpha 0.01
INFO:tensorflow:training with alpha=0.01
INFO:tensorflow:acc per class {'male_concept1': 1.0, 'random500_4': 1.0, 'overall': 1.0}
INFO:tensorflow:CAV accuracies: {'male_concept1': 1.0, 'random500_4': 1.0, 'overall': 1.0}
INFO:tensorflow:acc per class {'male_concept1': 1.0, 'random500_10': 1.0, 'overall': 1.0}
INFO:tensorflow:CAV accuracies: {'male_concept1': 1.0, 'random500_10': 1.0, 'overall': 1.0}
INFO:tensorflow:Training 

In [13]:

# Delete concepts that don't pass statistical testing
cd.test_and_remove_concepts(scores)

In [14]:
#Generate ace report after concepts have been removed
ace_helpers.save_ace_report(cd, cav_accuraciess, scores,
                                 results_summaries_dir + 'ace_results.txt')

In [15]:
#Get a list concepts above a certain TCAV score: 
def get_concepts_above_threshold(file_path, threshold):
  concepts = []
  with open(file_path, 'r') as file:
    for line in file:
        if "global" in line:
          parts = line.split(":")
          score = parts[2].split(",")[0]
          if float(score) > threshold:
            concepts.append(parts[1])
  return concepts

In [17]:
#Run above function
concepts = get_concepts_above_threshold('male_save_dir_11.2/results_summaries/ace_results.txt', .5)

# Plot examples of discovered concepts
for bn in cd.bottlenecks:
  ace_helpers.plot_concepts(cd, bn, 5, address=results_dir, mode='diverse', concepts=concepts)