In [1]:
# Torch
import torch
import torch.optim as optim
from torcheval.metrics import *

import pickle
from captum.attr import *
import random
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

np.set_printoptions(threshold=np.inf)

# Custom modules
from preprocessing_post_fastsurfer.subject import *
from preprocessing_post_fastsurfer.vis import *
from ozzy_torch_utils.split_dataset import *
from ozzy_torch_utils.subject_dataset import *
from ozzy_torch_utils.plot import *
from ozzy_torch_utils.train_nn import *
from ozzy_torch_utils.model_parameters import *
from ozzy_torch_utils.init_dataloaders import *
from explain_pointnet import *

In [2]:
# Load dataset
data_path = "/uolstore/home/users/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/scratch-disk/full-datasets/hcampus-1.5T-cohort"

subject_list = find_subjects_parallel(data_path)

Csv files: ['/uolstore/home/users/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/scratch-disk/full-datasets/hcampus-1.5T-cohort/idaSearch_3_19_2025.csv']


In [3]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"

Checking the significance of left and right hippocampi on models run with both

In [4]:
subject = sample(subject_list, 1)[0]

In [5]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_18-03-2025_18-04-04/run_18-03-2025_18-04-04_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [6]:
lr_cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_Right-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))

lr_attributions, lr_pred_research_group = pointnet_ig(model, lr_cloud, device)

vis_attributions(lr_attributions, subject, lr_cloud, lr_pred_research_group)

Widget(value='<iframe src="http://localhost:33519/index.html?ui=P_0x7f92f1c439b0_0&reconnect=auto" class="pyvi…

In [7]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_19-03-2025_16-12-19/run_19-03-2025_16-12-19_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [8]:
l_cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))

l_attributions, l_pred_research_group = pointnet_ig(model, l_cloud, device)

vis_attributions(l_attributions, subject, l_cloud, l_pred_research_group)

Widget(value='<iframe src="http://localhost:33519/index.html?ui=P_0x7f91f8ab0740_1&reconnect=auto" class="pyvi…

Interesting experiment comparing attributions from two permutations of the same cloud

In [9]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_18-03-2025_15-35-05/run_18-03-2025_15-35-05_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [10]:
cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))
    

In [11]:
attributions_orig, pred_research_group_orig = pointnet_ig(model, cloud, device)

shuffler = np.random.permutation(cloud.shape[0])

unshuffler = np.argsort(shuffler)

cloud_shuffled = np.array([cloud[i] for i in shuffler])

attributions_shuffled, pred_research_group_shuffle = pointnet_ig(model, cloud_shuffled, device)

cloud_unshuffled = np.array([cloud_shuffled[i] for i in unshuffler])

attributions_unshuffled = np.array([attributions_shuffled[i] for i in unshuffler])

attributions_diff = attributions_orig - attributions_unshuffled

# NB can't really visualise attributions as they will be normalsied and look large
print(attributions_diff)

if pred_research_group_orig != pred_research_group_shuffle:
    
    print("Research groups are different after shuffle")

print(attributions_diff.shape)
    

[[-3.08535995e-06  4.65440890e-06  4.48645434e-06]
 [-3.32577163e-07  5.11646334e-07 -1.04381377e-07]
 [ 8.90299161e-04 -1.96602151e-03 -1.65795947e-03]
 [-3.72678268e-06  1.01337329e-05  8.86744248e-06]
 [-4.31598988e-03  1.06674494e-02  6.17714964e-03]
 [ 2.69527985e-10  9.46259707e-11 -1.20724041e-10]
 [-1.31414642e-08  3.43966650e-09 -1.86445830e-08]
 [ 8.70255965e-09 -7.83311007e-09 -5.14485339e-09]
 [-3.76252261e-07  3.30626318e-07 -4.06732317e-07]
 [-4.54271058e-03  1.29642929e-02  2.20731065e-02]
 [ 8.42385184e-05 -1.76862618e-04 -3.20372959e-05]
 [ 6.87566490e-03 -1.67769541e-02 -1.31990508e-02]
 [-1.21963034e-03  3.50660797e-03  1.65260744e-03]
 [ 2.43853284e-09  5.50623607e-10 -3.65264523e-09]
 [-3.86577375e-08 -1.00897545e-07  9.55003310e-08]
 [-1.76114534e-09 -5.02678577e-09 -7.89636407e-10]
 [-4.40469163e-10 -1.55040061e-09  3.10643590e-10]
 [ 3.07192131e-09  3.28348955e-10  3.88158269e-09]
 [ 1.38619896e-09  7.98220056e-10 -1.26833087e-08]
 [ 6.10579950e-07 -6.00135238e-

In [12]:
vis_attributions(attributions_orig, subject, cloud, pred_research_group_orig)

# These two should look identical if the method was correct
vis_attributions(attributions_shuffled, subject, cloud_shuffled, pred_research_group_shuffle)

vis_attributions(attributions_unshuffled, subject, cloud_unshuffled, pred_research_group_shuffle)

Widget(value='<iframe src="http://localhost:33519/index.html?ui=P_0x7f91f8af76e0_2&reconnect=auto" class="pyvi…

Widget(value='<iframe src="http://localhost:33519/index.html?ui=P_0x7f91f83492b0_3&reconnect=auto" class="pyvi…

Widget(value='<iframe src="http://localhost:33519/index.html?ui=P_0x7f91f83497f0_4&reconnect=auto" class="pyvi…

Trying explainability with SHAP

In [13]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_18-03-2025_18-04-04/run_18-03-2025_18-04-04_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [16]:
def predict(input):

        tensor = torch.from_numpy(input).type(torch.float32).to('cuda')

        tensor = tensor.unsqueeze(0).transpose(2, 1)

        with torch.no_grad():

            prediction = torch.nn.functional.softmax(model(tensor)[0], dim=1)[0]

            # Return the value of the largest output
            # prediction = prediction[torch.argmax(prediction)]

        output = prediction.cpu().numpy()

        return output

def pointnet_shap(model, cloud, device):

    model.to(device)
    
    model.eval()

    explainer = shap.KernelExplainer(predict, cloud, output_names = ['CN', 'MCI'])

    shap_values = explainer.shap_values(cloud, check_additivity=False)

    print(shap_values)

    print(shap_values.shape)
    
    return shap_values, None

In [17]:
cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))

print(cloud.shape)

print(predict(cloud))

shap_values, pred_research_group = pointnet_shap(model, cloud, device)

vis_attributions(shap_values, subject, cloud, pred_research_group)

Using 1024 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


(1024, 3)
[0.57160383 0.42839617]


ValueError: operands could not be broadcast together with shapes (2,) (1024,) 