In [3]:
# Torch
import torch
import torch.optim as optim
from torcheval.metrics import *

import pickle
from captum.attr import *
import random
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

np.set_printoptions(threshold=np.inf)

# Custom modules
from preprocessing_post_fastsurfer.subject import *
from preprocessing_post_fastsurfer.vis import *
from ozzy_torch_utils.split_dataset import *
from ozzy_torch_utils.subject_dataset import *
from ozzy_torch_utils.plot import *
from ozzy_torch_utils.train_nn import *
from ozzy_torch_utils.model_parameters import *
from ozzy_torch_utils.init_dataloaders import *
from explain_pointnet import *

In [4]:
# Load dataset
data_path = "/uolstore/home/users/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/scratch-disk/full-datasets/hcampus-1.5T-cohort"

subject_list = find_subjects_parallel(data_path)

Csv files: ['/uolstore/home/users/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/scratch-disk/full-datasets/hcampus-1.5T-cohort/idaSearch_3_19_2025.csv']


In [5]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"

Checking the significance of left and right hippocampi on models run with both

In [6]:
subject = sample(subject_list, 1)[0]

In [7]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_18-03-2025_18-04-04/run_18-03-2025_18-04-04_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [8]:
lr_cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_Right-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))

lr_attributions, lr_pred_research_group = pointnet_ig(model, lr_cloud, device)

vis_attributions(lr_attributions, subject, lr_cloud, lr_pred_research_group)

Widget(value='<iframe src="http://localhost:38999/index.html?ui=P_0x7f2e9915e0f0_0&reconnect=auto" class="pyvi…

In [9]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_19-03-2025_16-12-19/run_19-03-2025_16-12-19_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [10]:
l_cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))

l_attributions, l_pred_research_group = pointnet_ig(model, l_cloud, device)

vis_attributions(l_attributions, subject, l_cloud, l_pred_research_group)

Widget(value='<iframe src="http://localhost:38999/index.html?ui=P_0x7f2d30383170_1&reconnect=auto" class="pyvi…

Interesting experiment comparing attributions from two permutations of the same cloud

In [11]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_18-03-2025_15-35-05/run_18-03-2025_15-35-05_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [12]:
cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))
    

In [13]:
attributions_orig, pred_research_group_orig = pointnet_ig(model, cloud, device)

shuffler = np.random.permutation(cloud.shape[0])

unshuffler = np.argsort(shuffler)

cloud_shuffled = np.array([cloud[i] for i in shuffler])

attributions_shuffled, pred_research_group_shuffle = pointnet_ig(model, cloud_shuffled, device)

cloud_unshuffled = np.array([cloud_shuffled[i] for i in unshuffler])

attributions_unshuffled = np.array([attributions_shuffled[i] for i in unshuffler])

attributions_diff = attributions_orig - attributions_unshuffled

# NB can't really visualise attributions as they will be normalsied and look large
print(attributions_diff)

if pred_research_group_orig != pred_research_group_shuffle:
    
    print("Research groups are different after shuffle")

print(attributions_diff.shape)
    

[[-1.51861547e-08 -6.99037517e-09  3.40384626e-08]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-7.54988745e-08 -1.13029587e-08 -2.19722191e-08]
 [ 3.64280376e-16  1.31432718e-16 -8.56526298e-17]
 [ 8.51122787e-10  4.94971624e-10 -2.27854515e-09]
 [-7.57662245e-08 -4.42031966e-09 -2.92418496e-08]
 [ 1.41721436e-07 -3.68942876e-08  1.02961342e-07]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 3.14342134e-11  1.52495990e-11 -4.54323986e-11]
 [-3.33693257e-07 -1.10187967e-07  3.43389549e-08]
 [-9.18143055e-14 -2.68178576e-14  5.32832242e-15]
 [-3.37704633e-07  1.00961511e-06 -8.49785422e-07]
 [ 3.26287684e-11  5.49803302e-11 -1.09774161e-10]
 [ 1.54177870e-09 -1.85715892e-09  2.89919097e-09]
 [ 1.27182169e-07 -5.70871307e-08  9.53287214e-08]
 [ 1.81373078e-12  1.36864082e-12 -4.68211843e-12]
 [-2.87945172e-16  1.66175117e-16 -1.18294101e-16]
 [ 2.84056850e-10  3.01735543e-10 -8.72266402e-10]
 [-4.13218288e-11 -9.62564302e-12  1.51738638e-12]
 [ 2.15057902e-08 -3.09163238e-

In [14]:
vis_attributions(attributions_orig, subject, cloud, pred_research_group_orig)

# These two should look identical if the method was correct
vis_attributions(attributions_shuffled, subject, cloud_shuffled, pred_research_group_shuffle)

vis_attributions(attributions_unshuffled, subject, cloud_unshuffled, pred_research_group_shuffle)

Widget(value='<iframe src="http://localhost:38999/index.html?ui=P_0x7f2d335bf710_2&reconnect=auto" class="pyvi…

Widget(value='<iframe src="http://localhost:38999/index.html?ui=P_0x7f2cc54c45f0_3&reconnect=auto" class="pyvi…

Widget(value='<iframe src="http://localhost:38999/index.html?ui=P_0x7f2cc54c7050_4&reconnect=auto" class="pyvi…

Trying explainability with SHAP

In [15]:
# Load model
pickle_pathname = "/uolstore/home/student_lnxhome01/sc22olj/Compsci/year3/individual-project-COMP3931/individual-project-sc22olj/runs/run_18-03-2025_18-04-04/run_18-03-2025_18-04-04_params.pkl"

with open(pickle_pathname, 'rb') as file:
    
    model_parameters = pickle.load(file)
    
model = model_parameters.model

In [111]:
def predict(input):

        tensor = torch.from_numpy(input).type(torch.float32).to('cuda')

        tensor = tensor.unsqueeze(0).transpose(2, 1)

        with torch.no_grad():

            prediction = torch.nn.functional.softmax(model(tensor)[0], dim=1)[0]

            # Return the value of the largest output
            # prediction = prediction[torch.argmax(prediction)]

        output = prediction.cpu().numpy()

        return output

def pointnet_shap(model, cloud, device):

    model.to(device)
    
    model.eval()

    explainer = shap.KernelExplainer(predict, cloud, output_names = ['CN', 'MCI'])

    shap_values = explainer.shap_values(cloud, check_additivity=False)

    print(shap_values)

    print(shap_values.shape)
    
    return shap_values, None

In [112]:
cloud = np.load(os.path.join(subject.path, "Left-Hippocampus_aligned_cropped_mesh_downsampledcloud.npy"))

print(cloud.shape)

print(predict(cloud))

shap_values, pred_research_group = pointnet_shap(model, cloud, device)

vis_attributions(shap_values, subject, cloud, pred_research_group)

Using 1024 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


(1024, 3)
[0.58881456 0.41118544]


ValueError: operands could not be broadcast together with shapes (2,) (1024,) 