In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
"""
Show the general pipeline used for the GNN models used for cell typing
in the neuron and limb based models referenced in the NEURD paper. This 
tutorial shows how to use the models for inference runs. 
"""

'\nShow the general pipeline used for the GNN models used for cell typing\nin the neuron and limb based models referenced in the NEURD paper. This \ntutorial shows how to use the models for inference runs. \n'

# Installing Dependencies

In [3]:
# !pip3 install torch-scatter -f https://data.pyg.org/whl/torch-1.10.2+cpu.html
# !pip3 install torch-sparse -f https://data.pyg.org/whl/torch-1.10.2+cpu.html
# !pip3 install torch-geometric

# Model Training Background Information

In [4]:
"""
Standard training 
of GNN models using pytorch geometric was used with the following hyper parameters

Neuron Model
------------
1) lr = 0.0001
2) label_weights = {
     '23P': 0.8,
     '4P': 0.5,
     '5P-IT': 1,
     '5P-NP': 1,
     '5P-PT': 1,
     '6P-CT': 0.8,
     '6P-IT': 1,
     'BC': 1,
     'BPC': 1,
     'MC': 1,
     'NGC': 1
    }
3) Dropout (0.5)
4) torch.optim.Adam optimizer
5) batch_size = 64
6) 60/20/20 train/validation/test split

Limb Model
----------
1) lr = 0.001 ** only learning hyper parameter that is different **
2) label_weights = {
     '23P': 0.8,
     '4P': 0.5,
     '5P-IT': 1,
     '5P-NP': 1,
     '5P-PT': 1,
     '6P-CT': 0.8,
     '6P-IT': 1,
     'BC': 1,
     'BPC': 1,
     'MC': 1,
     'NGC': 1
    }
3) Dropout (0.5)
4) torch.optim.Adam optimizer
5) batch_size = 64
6) 60/20/20 train/validation/test split

Note: training 
"""
""

''

# Step 0: Accessing ground truth data

In [5]:
"""
Can get ground truth cell typing labels from the caveclient
"""

'\nCan get ground truth cell typing labels from the caveclient\n'

In [6]:
from neurd.vdi_microns_cave import volume_data_interface as vdi



In [7]:
"""
The minnie public api wasn't working at the time of development so we
pulled down the public cell typing table ahead of time and now
just need to load it locally
"""

"\nThe minnie public api wasn't working at the time of development so we\npulled down the public cell typing table ahead of time and now\njust need to load it locally\n"

In [8]:
from datasci_tools import pandas_utils as pu
df_labels = pu.csv_to_df("./training_data/public_cave_ground_truth_cell_types_with_nucleus.csv")
df_labels

FileNotFoundError: [Errno 2] No such file or directory: './training_data/public_cave_ground_truth_cell_types_with_nucleus.csv'

## how to pull down the mesh for a segment id

In [None]:
segment_id = 864691135694415551

In [None]:
mesh = vdi.fetch_segment_id_mesh(segment_id)
mesh

In [None]:
y = df_labels.query(f"segment_id == {segment_id}")['cell_type'].to_list()[0]
y

In [None]:
from datasci_tools import ipyvolume_utils as ipvu

ipvu.plot_objects(mesh)

## Decomposition

In [None]:
"""
Would then just decompose using the given tutorials to get the 
neuron objects to be used in the GNN classification

Example: NEURD/Applications/Tutorials/Auto_Proof_Pipeline/Single_Soma_Exc
"""

## ** If want output to completely match those the pretrained models were developed with then need to compute spine volume with convex hull

In [None]:
from mesh_tools import trimesh_utils as tu

tu.mesh_volume_old = tu.mesh_volume
def new_mesh_volume_func(*args,**kwargs):
    kwargs["watertight_method"] = "convex_hull"
    return tu.mesh_volume_old(
        *args,
        **kwargs
    )

tu.mesh_volume = new_mesh_volume_func

# Step 0: Pulling Data Previously Computed

In [None]:
from neurd import neuron_utils as nru
from neuron_morphology_tools import neuron_nx_utils as nxu


In [None]:
exc_neuron_obj = "../Auto_Proof_Pipeline/Single_Soma_Exc/864691134917511946_auto_proof.pbz2"
exc_neuron_mesh = "../Auto_Proof_Pipeline/Single_Soma_Exc/864691134917511946.off"
exc_cell_type = "5P-IT"

neuron_obj_exc = nru.decompress_neuron(
    exc_neuron_obj,
    original_mesh=exc_neuron_mesh
)
neuron_obj_exc

In [None]:
inh_neuron_obj = "../Auto_Proof_Pipeline/Single_Soma_Inh/864691135567721964_auto_proof.pbz2"
inh_neuron_mesh = "../Auto_Proof_Pipeline/Single_Soma_Inh/864691135567721964.off"
inh_cell_type = "MC"

neuron_obj_inh = nru.decompress_neuron(
    inh_neuron_obj,
    original_mesh=inh_neuron_mesh
)
neuron_obj_inh

In [None]:
G_exc = neuron_obj_exc.neuron_graph_after_proof
G_exc

In [None]:
G_inh = neuron_obj_inh.neuron_graph_after_proof
G_inh

In [None]:
nxu.draw_tree(G_exc)

# Step 1: Transforming networkx graphs into dataset objects (that can be easily converted to pytorch geometric dataset objects)

In [None]:
from neurd.gnn_cell_typing_utils import NeuronGraphData,OutputClassConfig
from neurd import gnn_cell_typing_utils as gnnu

In [None]:
Gs = [
    (G_exc,exc_cell_type),
    (G_inh,inh_cell_type),
]

In [None]:
# converting networkx graphs into dataset objects
G_data = [NeuronGraphData(k,label=v) for k,v in Gs]
Gd = G_data[0]
Gd

In [None]:
# deciding whether to use full neurons or limbs of neurons as graph data

# full neurons as instances
graph_data_list_neuron = G_data

#neuron limbs as instances
graph_data_list_limbs = [limb_G for neurons in G_data for limb_G in neurons.limb_data_objs]

# Step 2: Converting dataset objects to pytorch dataset objects

## a) Loading the normalization mean/std

In [None]:
from datasci_tools import pandas_utils as pu
df_norm_filepath = "./models/neuron_feature_normalization.csv"
df_norm = pu.csv_to_df(df_norm_filepath)
df_norm

## b) Creating the class mapping

In [None]:
class_config = OutputClassConfig(gnnu.microns_cell_type_map)
class_config

## c) Creating Pytorch Datasets

In [None]:
from neurd.gnn_cell_typing_utils import NeuronDataset

torch_data_obj_neuron = NeuronDataset(
    graph_data_list_neuron,
    normalization_df=df_norm,
    class_config = class_config,
)
# the pytorch geometric dataset 
torch_data_obj_neuron.dataset

In [None]:
torch_data_obj_limb = NeuronDataset(
    graph_data_list_limbs,
    normalization_df=df_norm,
    class_config = class_config,
)

# the pytorch geometric dataset 
torch_data_obj_limb.dataset

## d) Creating a dataloader

# Step 3: Loading the Model and running inference

In [None]:
batch_size = 20

## Option 1: Neuron Based Model

### a) Instantiating model

In [None]:
model_architecture_kwargs = architecture_kwargs = dict(
    num_node_features = torch_data_obj_neuron.dataset[0].x.shape[1],
    num_classes = class_config.num_classes,
    activation_function = "relu",
    global_pool_type="mean_weighted",
    global_pool_weight = "node_weight",
    
    n_hidden_channels=128,
    n_layers = 2,
    
    #batch norm specifics
    use_bn = True,
    track_running_stats=True,
)

model = gnnu.NeuronGCN(**architecture_kwargs)
model_weights = "./models/neuron_gnn_2_layer"

neuron_input_obj = gnnu.GnnInput(
    dataset_obj = torch_data_obj_neuron,
    model = model,
    model_weights_filepath = model_weights,
    class_config = class_config,
)

neuron_input_obj.model

### b) Inference Run

In [None]:
inf_obj = gnnu.InferenceRunner(neuron_input_obj)
inf_obj.run()

In [None]:
inf_obj.prediction_df

## Option 2: Limb Based Model

### a) Instantiating model

In [None]:
architecture_kwargs_limb = dict(
    num_node_features = torch_data_obj_limb.dataset[0].x.shape[1],
    num_classes = class_config.num_classes,
    activation_function = "relu",
    global_pool_type="mean_weighted",
    global_pool_weight = "node_weight",
    
    n_hidden_channels=128,
    n_layers = 2,
    
    #batch norm specifics
    use_bn = True,
    track_running_stats=True,
)

model_limb = gnnu.LimbGCN(**architecture_kwargs_limb)
model_weights_limb = "./models/limb_gnn_2_layer"

limb_input_obj = gnnu.GnnInput(
    dataset_obj = torch_data_obj_limb,
    model = model_limb,
    model_weights_filepath = model_weights_limb,
    class_config = class_config,
)

limb_input_obj.model

### b) Inference Run

In [None]:
inf_obj_limb = gnnu.InferenceRunner(limb_input_obj)
inf_obj_limb.run()

In [None]:
"""
Note: if neuron objects were not run with 
spine volume computed using a convex hull then some limbs
may not be as accurate
"""

inf_obj_limb.prediction_df

In [None]:
"""
Note: the limb based model is lower performing becuase the 
newer neuron graph objects were generating using mesh volume 
computed using a higher fidelity method of mesh repair
whereas the limb model was based on data where the mesh
volume was computed using a convex hull. This fact along
with the usage of max pooling in the limb based model
is what makes this feature more likely to degrade the 
classification accuracy. To avoid this you could train 
your own limb based model or when computing the neuron
object set the  mesh volume function to use a convex
hull as mentioned earlier
"""