In [1]:
import os

db_S_dir = os.environ["DATA"] + "PatImgXAI_data/db3.0.0_S/"
db_L_dir = os.environ["DATA"] + "PatImgXAI_data/db3.0.0_L/"
db_M_dir = os.environ["DATA"] + "PatImgXAI_data/db3.0.0_M/"
db_patterns_dir = os.environ["DATA"] + "PatImgXAI_data/db3.0.0/patterns/"


model_dir_root = os.environ["DATA"] + "models/db3.0.0/01_protov5/"
shap_scale_img_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","shap_scale.png")
yes_pred_img_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","button_yes.png")
no_pred_img_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","button_no.png")
yes_small_pred_img_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","button_yes_small.png")
no_small_pred_img_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","button_no_small.png")
pos_pred_legend_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","cf_info_pos.png")
neg_pred_legend_path = os.path.join(os.environ["DATA"] + "PatImgXAI_data/db3.0.0","cf_info_neg.png")
interface_dir = os.environ["DATA"] + "webinterfaces/int05_prototype/"

XAI_DATASET_SIZE = 50

N_JOBS = 20
N_JOBS_GPU = 6

In [2]:

# Grid division for full image
X_DIVISIONS_L = 15
Y_DIVISIONS_L = 15
X_DIVISIONS_S = 10
Y_DIVISIONS_S = 10

# Grid division of patterns
X_DIVISIONS_PATTERNS = 2
Y_DIVISIONS_PATTERNS = 2

# Probability to generate a geometrical shape at each position in the grid
SHAPE_PROB = 0.5

# Define available shapes
SHAPES = ['circle', 'square', 'triangle']
COLORS  = ["#A33E9A", "#E0B000", "#0C90C0"] # Purple, Yellow, Blue

explict_colors_dict = {
    "#A33E9A": "purple",
    "#E0B000": "yellow",
    "#0C90C0": "blue"
}

In [3]:
from xaipatimg.datagen.dbimg import load_db

db_patterns = load_db(db_patterns_dir)

In [6]:
pattern_3sym_2col_keys = []

# Extracting list of patterns that contain 3 symbols and 2 colors
for k, v in db_patterns.items():
    if len(v["content"]) == 3:
        img_col_d = {}
        for entry in v["content"]:
            img_col_d[entry["color"]] = True

        if len(img_col_d.keys()) == 2:
            pattern_3sym_2col_keys.append(k)

In [7]:
datasets_path_L = os.path.join(db_L_dir, "datasets", "01_protov5")
datasets_path_S = os.path.join(db_S_dir, "datasets", "01_protov5")
datasets_path_M = os.path.join(db_M_dir, "datasets", "01_protov5")

In [8]:
from xaipatimg.datagen.gendataset import generic_rule_pattern_exactly_1_time_exclude_more, \
    generic_rule_shape_color_even

rules_data_L = [

    {"name": "hard1_find_pattern_rot", "gen_fun": generic_rule_pattern_exactly_1_time_exclude_more, "gen_kwargs": {"x_division_full": X_DIVISIONS_L,
                                                                                                     "y_division_full": Y_DIVISIONS_L,
                                                                                                     "x_division_pattern": X_DIVISIONS_PATTERNS,
                                                                                                     "y_division_pattern": Y_DIVISIONS_PATTERNS,
                                                                                                     "consider_rotations": True},
     "question": "Is the pattern or any of its left or right rotations in the image?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": "", "pattern_id": pattern_3sym_2col_keys[0]},

    {"name": "hard2_blue_circle_even", "gen_fun": generic_rule_shape_color_even, "gen_kwargs": {"x_division": X_DIVISIONS_L,
                                                                                                "y_division": Y_DIVISIONS_L,
                                                                                                "shape": "circle",
                                                                                                "color": "#0C90C0"},
     "question": "Is the number of blue circles an even number?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": ""},

    {"name": "hard3_find_pattern_rot", "gen_fun": generic_rule_pattern_exactly_1_time_exclude_more, "gen_kwargs": {"x_division_full": X_DIVISIONS_L,
                                                                                                     "y_division_full": Y_DIVISIONS_L,
                                                                                                     "x_division_pattern": X_DIVISIONS_PATTERNS,
                                                                                                     "y_division_pattern": Y_DIVISIONS_PATTERNS,
                                                                                                     "consider_rotations": True},
     "question": "Is the pattern or any of its left or right rotations in the image?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": "", "pattern_id": pattern_3sym_2col_keys[1]},

    {"name": "hard4_purple_square_even", "gen_fun": generic_rule_shape_color_even, "gen_kwargs": {"x_division": X_DIVISIONS_L,
                                                                                                "y_division": Y_DIVISIONS_L,
                                                                                                "shape": "square",
                                                                                                "color": "#A33E9A"},
     "question": "Is the number of purple squares an even number?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": ""},
]

In [9]:
from xaipatimg.datagen.gendataset import generic_rule_pattern_exactly_1_time_exclude_more, \
    generic_rule_shape_color_even

rules_data_S = [

    {"name": "easy1_find_pattern_rot", "gen_fun": generic_rule_pattern_exactly_1_time_exclude_more, "gen_kwargs": {"x_division_full": X_DIVISIONS_S,
                                                                                                     "y_division_full": Y_DIVISIONS_S,
                                                                                                     "x_division_pattern": X_DIVISIONS_PATTERNS,
                                                                                                     "y_division_pattern": Y_DIVISIONS_PATTERNS,
                                                                                                     "consider_rotations": True},
     "question": "Is the pattern or any of its left or right rotations in the image?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": "", "pattern_id": pattern_3sym_2col_keys[2]},

    {"name": "easy2_yellow_triangle_even", "gen_fun": generic_rule_shape_color_even, "gen_kwargs": {"x_division": X_DIVISIONS_S,
                                                                                                "y_division": Y_DIVISIONS_S,
                                                                                                "shape": "triangle",
                                                                                                "color": "#E0B000"},
     "question": "Is the number of yellow triangles an even number?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": ""},

    {"name": "easy3_find_pattern_rot", "gen_fun": generic_rule_pattern_exactly_1_time_exclude_more, "gen_kwargs": {"x_division_full": X_DIVISIONS_S,
                                                                                                     "y_division_full": Y_DIVISIONS_S,
                                                                                                     "x_division_pattern": X_DIVISIONS_PATTERNS,
                                                                                                     "y_division_pattern": Y_DIVISIONS_PATTERNS,
                                                                                                     "consider_rotations": True},
     "question": "Is the pattern or any of its left or right rotations in the image?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": "", "pattern_id": pattern_3sym_2col_keys[3]},

    {"name": "easy4_purple_circle_even", "gen_fun": generic_rule_shape_color_even, "gen_kwargs": {"x_division": X_DIVISIONS_S,
                                                                                                  "y_division": Y_DIVISIONS_S,
                                                                                                  "shape": "circle",
                                                                                                  "color": "#A33E9A"},
     "question": "Is the number of purple circles an even number?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": ""},
]

In [13]:
from xaipatimg.datagen.gendataset import generic_rule_pattern_exactly_1_time_exclude_more, \
    generic_rule_shape_color_even

rules_data_M = [

    {"name": "med1_find_pattern_rot", "gen_fun": generic_rule_pattern_exactly_1_time_exclude_more, "gen_kwargs": {"x_division_full": X_DIVISIONS_L,
                                                                                                     "y_division_full": Y_DIVISIONS_L,
                                                                                                     "x_division_pattern": X_DIVISIONS_PATTERNS,
                                                                                                     "y_division_pattern": Y_DIVISIONS_PATTERNS,
                                                                                                     "consider_rotations": True},
     "question": "Is the pattern or any of its left or right rotations in the image?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": "", "pattern_id": pattern_3sym_2col_keys[4]},

    {"name": "med2_yellow_square_even", "gen_fun": generic_rule_shape_color_even, "gen_kwargs": {"x_division": X_DIVISIONS_L,
                                                                                                "y_division": Y_DIVISIONS_L,
                                                                                                "shape": "square",
                                                                                                "color": "#E0B000"},
     "question": "Is the number of yellow squares an even number?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": ""},

    {"name": "med3_find_pattern_rot", "gen_fun": generic_rule_pattern_exactly_1_time_exclude_more, "gen_kwargs": {"x_division_full": X_DIVISIONS_L,
                                                                                                     "y_division_full": Y_DIVISIONS_L,
                                                                                                     "x_division_pattern": X_DIVISIONS_PATTERNS,
                                                                                                     "y_division_pattern": Y_DIVISIONS_PATTERNS,
                                                                                                     "consider_rotations": True},
     "question": "Is the pattern or any of its left or right rotations in the image?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": "", "pattern_id": pattern_3sym_2col_keys[5]},

    {"name": "med4_blue_triangle_even", "gen_fun": generic_rule_shape_color_even, "gen_kwargs": {"x_division": X_DIVISIONS_L,
                                                                                                "y_division": Y_DIVISIONS_L,
                                                                                                "shape": "triangle",
                                                                                                "color": "#0C90C0"},
     "question": "Is the number of blue triangles an even number?", "target_acc" : 0.1, "shown_acc" : 0.9, "samples_interface": 10, "pos_llm_scaffold": "", "neg_llm_scaffold": ""},
]

In [14]:
from xaipatimg.ml.xai import generate_shap_resnet18, generate_counterfactuals_resnet18_random_approach, \
    create_xai_index
from tqdm import tqdm

def generate_explanations(rules_data, db_dir, datasets_dir_path):

    for rule_idx in tqdm(range(len(rules_data))):

        model_dir = os.path.join(model_dir_root, rules_data[rule_idx]["name"])
        dataset_filename = rules_data[rule_idx]["name"] + "_test.csv"
        generic_rule_fun = rules_data[rule_idx]["gen_fun"]
        generic_rule_fun_kwargs = rules_data[rule_idx]["gen_kwargs"]
        xai_output_paths = {
            "shap" : "shap",
            # "cf" : "cf",
        }

        if "pattern_id" in rules_data[rule_idx]:
            generic_rule_fun_kwargs["pattern_content"] = db_patterns[rules_data[rule_idx]["pattern_id"]]["content"]

        generate_shap_resnet18(db_dir, datasets_dir_path=datasets_dir_path, dataset_filename=dataset_filename,
                               model_dir=model_dir, xai_output_path=os.path.join(model_dir, xai_output_paths["shap"]),
                               yes_pred_img_path=yes_pred_img_path, no_pred_img_path=no_pred_img_path, device="cuda:0", n_jobs=N_JOBS,
                               dataset_size=XAI_DATASET_SIZE, masker="ndarray", shap_scale_img_path=shap_scale_img_path, max_evals=3)

        # generate_counterfactuals_resnet18_random_approach(db_dir, datasets_dir_path=datasets_dir_path, dataset_filename=dataset_filename,
        #                                                   model_dir=model_dir,
        #                                                   xai_output_path=os.path.join(model_dir, xai_output_paths["cf"]),
        #                                                   yes_pred_img_path=yes_pred_img_path, no_pred_img_path=no_pred_img_path,
        #                                                   shapes=SHAPES, colors=COLORS, empty_probability=1-SHAPE_PROB,
        #                                                   max_depth=10, nb_tries_per_depth=2000, generic_rule_fun=generic_rule_fun,
        #                                                   devices=["cuda:0", "cuda:1"], n_jobs=N_JOBS_GPU,
        #                                                   dataset_size=XAI_DATASET_SIZE,pos_pred_legend_path=pos_pred_legend_path,
        #                                                   neg_pred_legend_path=neg_pred_legend_path,
        #                                                   **generic_rule_fun_kwargs)

        create_xai_index(db_dir, datasets_dir_path=datasets_dir_path, dataset_filename=dataset_filename, model_dir=model_dir,
                         xai_dirs=xai_output_paths, dataset_size=XAI_DATASET_SIZE, device="cuda:0")


In [15]:
generate_explanations(rules_data_S, db_S_dir, datasets_path_S)

  0%|          | 0/4 [00:00<?, ?it/s]

Loading dataset content for easy1_find_pattern_rot_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 34%|███▍      | 17/50 [00:00<00:00, 167.77it/s][A
100%|██████████| 50/50 [00:00<00:00, 144.19it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0


Computing shap values


  0%|          | 0/4 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [None]:
generate_explanations(rules_data_L, db_L_dir, datasets_path_L)

In [16]:
generate_explanations(rules_data_M, db_M_dir, datasets_path_M)

  0%|          | 0/4 [00:00<?, ?it/s]

Loading dataset content for med1_find_pattern_rot_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 26%|██▌       | 13/50 [00:00<00:00, 121.59it/s][A
 52%|█████▏    | 26/50 [00:00<00:00, 109.40it/s][A
100%|██████████| 50/50 [00:00<00:00, 116.02it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0


Computing shap values
Generating shap images



  0%|          | 0/50 [00:00<?, ?it/s][A
 40%|████      | 20/50 [00:00<00:00, 39.76it/s][A
100%|██████████| 50/50 [00:07<00:00,  6.67it/s][A


Loading dataset content for med1_find_pattern_rot_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 42%|████▏     | 21/50 [00:00<00:00, 203.61it/s][A
100%|██████████| 50/50 [00:00<00:00, 185.38it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0
 25%|██▌       | 1/4 [00:16<00:50, 16.85s/it]

Loading dataset content for med2_yellow_square_even_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 44%|████▍     | 22/50 [00:00<00:00, 211.09it/s][A
100%|██████████| 50/50 [00:00<00:00, 207.46it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0


Computing shap values
Generating shap images



  0%|          | 0/50 [00:00<?, ?it/s][A
100%|██████████| 50/50 [00:01<00:00, 32.03it/s][A


Loading dataset content for med2_yellow_square_even_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 28%|██▊       | 14/50 [00:00<00:00, 130.42it/s][A
 56%|█████▌    | 28/50 [00:00<00:00, 127.17it/s][A
100%|██████████| 50/50 [00:00<00:00, 151.09it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0
 50%|█████     | 2/4 [00:26<00:25, 12.61s/it]

Loading dataset content for med3_find_pattern_rot_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 44%|████▍     | 22/50 [00:00<00:00, 215.74it/s][A
100%|██████████| 50/50 [00:00<00:00, 155.46it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0


Computing shap values
Generating shap images



  0%|          | 0/50 [00:00<?, ?it/s][A
100%|██████████| 50/50 [00:01<00:00, 31.33it/s][A


Loading dataset content for med3_find_pattern_rot_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 42%|████▏     | 21/50 [00:00<00:00, 204.12it/s][A
100%|██████████| 50/50 [00:00<00:00, 193.14it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0
 75%|███████▌  | 3/4 [00:36<00:11, 11.19s/it]

Loading dataset content for med4_blue_triangle_even_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 26%|██▌       | 13/50 [00:00<00:00, 128.97it/s][A
 54%|█████▍    | 27/50 [00:00<00:00, 129.76it/s][A
100%|██████████| 50/50 [00:00<00:00, 127.12it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0


Computing shap values
Generating shap images



  0%|          | 0/50 [00:00<?, ?it/s][A
100%|██████████| 50/50 [00:01<00:00, 32.10it/s][A


Loading dataset content for med4_blue_triangle_even_test.csv



  0%|          | 0/50 [00:00<?, ?it/s][A
 44%|████▍     | 22/50 [00:00<00:00, 213.94it/s][A
100%|██████████| 50/50 [00:00<00:00, 200.70it/s][A
Using cache found in /home/jleguy/.cache/torch/hub/pytorch_vision_v0.10.0
100%|██████████| 4/4 [00:45<00:00, 11.41s/it]


In [None]:
# from transformers import AutoTokenizer
# from transformers import AutoModelForCausalLM
# import csv
# from xaipatimg.ml.xai import generate_LLM_explanations, create_xai_index
# from tqdm import tqdm
#
# model_id = "openai/gpt-oss-20b"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# llm_model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     device_map="auto",
#     torch_dtype="auto",
# )
#
# for rule_idx in tqdm(range(len(rules_data))):
#
#     model_dir = os.path.join(model_dir_root, rules_data[rule_idx]["name"])
#     dataset_filename = rules_data[rule_idx]["name"] + "_test.csv"
#
#     # Extracting the subset of indices of samples selected for the experimental interface, in order to ease the cost of calculation
#     interface_content_path = os.path.join(interface_dir, "res", "tasks", f"{rules_data[rule_idx]["name"]}_content.csv")
#     interface_selected_idx = [int(row["og_idx"]) for row in list(csv.DictReader(open(interface_content_path), delimiter=','))]
#
#     xai_output_paths = {
#         "shap" : "shap",
#         "cf" : "cf",
#         "llm" : "llm",
#     }
#     generate_LLM_explanations(db_dir, db, datasets_dir_path=datasets_dir_path, dataset_filename=dataset_filename,
#                               model_dir=model_dir, llm_model=llm_model, llm_tokenizer=tokenizer,
#                               xai_output_path=os.path.join(model_dir, xai_output_paths["llm"]),
#                               explicit_colors_dict=explict_colors_dict, question=rules_data[rule_idx]["question"],
#                               yes_pred_img_path=yes_pred_img_path, no_pred_img_path=no_pred_img_path,
#                               yes_pred_img_path_small=yes_small_pred_img_path, no_pred_img_path_small=no_small_pred_img_path,
#                               device="cuda:0", dataset_size=XAI_DATASET_SIZE, only_for_index=interface_selected_idx,
#                               path_to_counterfactuals_dir_for_model_errors=os.path.join(model_dir, xai_output_paths["cf"]),
#                               pos_llm_scaffold=rules_data[rule_idx]["pos_llm_scaffold"], neg_llm_scaffold=rules_data[rule_idx]["neg_llm_scaffold"])
#
#     create_xai_index(db_dir, dataset_filename=dataset_filename, model_dir=model_dir, xai_dirs=xai_output_paths, dataset_size=XAI_DATASET_SIZE, device="cuda:0")
