In [1]:
import sys
import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import pytorch_lightning as pl
import numpy as np
from tqdm import tqdm

from explainability_utils import *

# Add the 'training' directory to sys.path
sys.path.append(os.path.abspath("../training"))
from training_utils import CNN2D, ExplainerDataset

  from .autonotebook import tqdm as notebook_tqdm
Seed set to 42
Seed set to 42


(../preprocessed_dset/spectrograms) Mean: [0.68137387 0.66712049 0.64939852], Std: [0.13272135 0.1345285  0.13304515] (calculated and saved)


In [2]:
# Path to the data
base_path = "..\preprocessed_dset"
data_path = os.path.join(base_path, "spectrograms")
meta_path = os.path.join(base_path, "metadata.csv")

shap_data_path = 'samples'

output_path = "shap_tensors"
os.makedirs(output_path, exist_ok=True)
MAX_EVALS = 5000


f_t_range_64_path = os.path.join(data_path, "f_t_range.npy")
tf = np.load(f_t_range_64_path)

ft = [tf[:2], tf[2:]]

# Path to the best model found in the training notebooks
best_model_path = r"..\training\model_checkpoints\best_model_fold_2.ckpt"
best_model_c_path = r"..\training\model_checkpoints\best_model_c_fold_2.ckpt"

sample_dim = (33, 96)

# Setting for the datasets
mean, std = spectra_stats(data_path, meta_path)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

(..\preprocessed_dset\spectrograms) Mean: [0.68137387 0.66712049 0.64939852], Std: [0.13272135 0.1345285  0.13304515] (calculated and saved)
Using device: cuda


### Compute SHAP Tensor for each explainability set sample

In [3]:
model = CNN2D.load_from_checkpoint(best_model_path, input_dim=sample_dim, num_classes=2)
model.eval()
model.to(device)
print("Model loaded")

Model loaded


#### Foreshocks

In [4]:
dset_pre = ExplainerDataset(data_path = os.path.join(shap_data_path, 'foreshocks'), 
                            dataframe_path = meta_path, 
                            transform=transform)

dloader_pre = DataLoader(dset_pre, batch_size=1, shuffle=False)

In [5]:
output_dir = os.path.join(output_path, "foreshocks")
os.makedirs(output_dir, exist_ok=True)
for sample in tqdm(dloader_pre, total = 300):
    compute_shap_tensor(model = model,
                        sample = sample,
                        dim = sample_dim,
                        device = device,
                        max_evals = MAX_EVALS,
                        save_dir = output_dir)

PartitionExplainer explainer: 2it [00:46, 46.26s/it]               
PartitionExplainer explainer: 2it [00:41, 41.43s/it]               
PartitionExplainer explainer: 2it [00:41, 41.18s/it]               
PartitionExplainer explainer: 2it [00:40, 40.37s/it]               
PartitionExplainer explainer: 2it [00:41, 41.68s/it]               
PartitionExplainer explainer: 2it [00:40, 40.91s/it]               
PartitionExplainer explainer: 2it [00:41, 41.15s/it]               
PartitionExplainer explainer: 2it [00:39, 39.82s/it]               
PartitionExplainer explainer: 2it [00:39, 39.44s/it]               
PartitionExplainer explainer: 2it [00:48, 48.28s/it]               
PartitionExplainer explainer: 2it [00:44, 44.24s/it]               
PartitionExplainer explainer: 2it [00:41, 41.20s/it]               
PartitionExplainer explainer: 2it [00:40, 40.09s/it]               
PartitionExplainer explainer: 2it [00:40, 40.84s/it]               
PartitionExplainer explainer: 2it [00:40, 40.47s

#### Aftershocks

In [6]:
dset_post = ExplainerDataset(data_path = os.path.join(shap_data_path, 'aftershocks'), 
                            dataframe_path = meta_path, 
                            transform=transform)

dloader_post = DataLoader(dset_pre, batch_size=1, shuffle=False)

In [7]:
output_dir = os.path.join(output_path, "aftershocks")
os.makedirs(output_dir, exist_ok=True)
for sample in tqdm(dloader_post, total = 300):
    compute_shap_tensor(model = model,
                        sample = sample,
                        dim = sample_dim,
                        device = device,
                        max_evals = MAX_EVALS,
                        save_dir = output_dir)

PartitionExplainer explainer: 2it [00:35, 35.41s/it]               
PartitionExplainer explainer: 2it [00:36, 36.12s/it]               
PartitionExplainer explainer: 2it [00:36, 36.23s/it]               
PartitionExplainer explainer: 2it [00:36, 36.22s/it]               
PartitionExplainer explainer: 2it [00:36, 36.42s/it]               
PartitionExplainer explainer: 2it [00:36, 36.61s/it]               
PartitionExplainer explainer: 2it [00:36, 36.99s/it]               
PartitionExplainer explainer: 2it [00:35, 35.97s/it]               
PartitionExplainer explainer: 2it [00:35, 35.48s/it]               
PartitionExplainer explainer: 2it [00:36, 36.18s/it]               
PartitionExplainer explainer: 2it [00:36, 36.28s/it]               
PartitionExplainer explainer: 2it [00:37, 37.67s/it]               
PartitionExplainer explainer: 2it [00:35, 35.14s/it]               
PartitionExplainer explainer: 2it [00:35, 35.73s/it]               
PartitionExplainer explainer: 2it [00:35, 35.36s