In [2]:
import pygsp
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from caveclient import CAVEclient
import torch
from torch_geometric import utils
from torch_geometric.data import Data
from tqdm import tqdm 
from pygod.models import DONE

In [6]:
from MinnieData_Table_Extraction import extract_full_nodes, get_nodes_table, set_node_attributes
from Node_Attributes import allens_prediction,gac_prediction, in_out_degs, cell_type

In [4]:
def filter_by_layer_connected(nodes, edges, layer):
    '''
    Extract subgraphs from June connectome data by layer for analysis
    Parameters: 
        nodes (Pandas dataframe) final june node dataframe
        edges (Pandas dataframe) final june edge dataframe 
        layer (string) layer of interest to extract
    Return: 
        node table subgraph (Pandas dataframe) by layer without orphans
        edge table subgraph (Pandas dataframe) by layer
        Subgraph G (NetworkX graph object) without orphans
    '''

    # Filter entire node table to only those in layer 
    rslt_synapse_df = nodes[nodes['layer'] == layer] 

    # Get list of unique neurons
    list_of_neurons = set(rslt_synapse_df['pt_root_id'].to_list())
    # make df with list of neurons FIX IT

    # Filter the synapse tables  extract all unique connections between nodes
    filtered = edges[edges['Source'].isin(list_of_neurons)]
    filtered2 = filtered[filtered['Target'].isin(list_of_neurons)]

    # create fully connected graph object
    G = nx.from_pandas_edgelist(filtered2, source="Source", target="Target", create_using=nx.DiGraph, edge_attr=["Euclidean_Distance"])
    list_of_current_nodes = set(list(G.nodes))

    # return node_df for fully connected graph object (does not include orphans)
    connected_node_list = list(G.nodes())
    rslt_synapse_df = rslt_synapse_df[rslt_synapse_df['pt_root_id'].isin(connected_node_list)]

    #reformatting node_df and dropping duplicates
    rslt_synapse_df = rslt_synapse_df.reset_index()
    rslt_synapse_df = rslt_synapse_df.drop(columns=['index','Unnamed: 0'], axis=1)
    rslt_synapse_df = rslt_synapse_df.drop_duplicates(subset=['pt_root_id'])
    
    return rslt_synapse_df, filtered2, G


In [5]:
#load in node and edge list (from extract_full_nodes)
nodes = pd.read_csv("final_connectome_node_table.csv") 
edges = pd.read_csv("final_connectome_edge_table.csv")

## Layer 1

In [7]:
#Extract node table, edge table, and graph object (No orphans in node table or graph)
node_df_L1, edge_df_L1, G_L1 = filter_by_layer_connected(nodes, edges, 'L1')  

In [6]:
node_df_L1

Unnamed: 0,id,pt_root_id,pt_position,timestamp,layer,cell_polarity,subclass,pre_syn_count,post_syn_count,unique_pre_syn_target_count,unique_post_syn_target_count
0,485509,864691136740606812,[282608 103808 20318],2022-06-09 14:30:32.002707,L1,excitatory,23P,93.0,6802.0,77.0,6133.0
1,420548,864691135463500869,[261040 103568 19671],2022-06-09 14:30:32.002707,L1,excitatory,23P,602.0,4492.0,480.0,3801.0
2,484868,864691136199051157,[290608 84320 24817],2022-06-09 14:30:32.002707,L1,excitatory,23P,235.0,2105.0,216.0,1984.0
3,104850,864691136311834173,[105328 109056 20813],2022-06-09 14:30:32.002707,L1,excitatory,23P,42.0,3350.0,31.0,3046.0
4,420304,864691135567738604,[251200 102352 18621],2022-06-09 14:30:32.002707,L1,excitatory,23P,80.0,4588.0,65.0,4192.0
...,...,...,...,...,...,...,...,...,...,...,...
1284,420328,864691135501873474,[256880 101424 18112],2022-06-09 14:30:32.002707,L1,excitatory,23P,129.0,4810.0,110.0,4445.0
1285,607791,864691135771586555,[348368 97248 15497],2022-06-09 14:30:32.002707,L1,inhibitory,BPC,13.0,894.0,10.0,778.0
1286,290914,864691135194551082,[193296 106240 18412],2022-06-09 14:30:32.002707,L1,inhibitory,NGC,2024.0,3424.0,1626.0,3190.0
1287,607592,864691135354901967,[360224 86496 25046],2022-06-09 14:30:32.002707,L1,excitatory,23P,21.0,2760.0,15.0,2630.0


In [8]:
#one hot encode cell_polarity (excitatory vs inhibitory) and subclass (cell type)
node_df_hot_L1 = pd.get_dummies(node_df_L1, columns=["subclass","cell_polarity"], prefix={"Cell Type","Cell Classification"})
node_df_hot_L1 = node_df_hot_L1.drop(['id','timestamp','unique_pre_syn_target_count','unique_post_syn_target_count'],axis=1)
#Relabel pre_synaptic_count as out degree and vice versa
node_df_hot_L1 = node_df_hot_L1.rename(columns={'pre_syn_count':'out degree','post_syn_count':'in degree'}) #Relabel pre_synaptic_count as out degree and vice versa

In [8]:
#checking for unique cell types to set node attributes 
node_df_L1['subclass'].unique()

array(['23P', 'NGC', 'MC', 'BPC', 'BC', '5P-PT', '4P'], dtype=object)

In [6]:
node_attributes_L1=['in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC','Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory']

In [9]:
#set node attributes 
for index, row in node_df_hot_L1.iterrows():
     node_attr_dict_L1={k: float(row.to_dict()[k]) for k in node_attributes_L1}
     G_L1.nodes[row['pt_root_id']].update(node_attr_dict_L1)

In [10]:
#creating py geometric data object
Data_L1=utils.from_networkx(G_L1, group_node_attrs=('in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC','Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory'
        ))

In [11]:
#running DONE

model_DONE_L1 = DONE()
model_DONE_L1.fit(Data_L1)

labels_DONE_L1 = model_DONE_L1.predict(Data_L1)
print('Labels:')
print(labels_DONE_L1)

outlier_scores_DONE_L1 = model_DONE_L1.decision_function(Data_L1)
print('Raw scores:')
print(outlier_scores_DONE_L1)

prob_DONE_L1 = model_DONE_L1.predict_proba(Data_L1)
print('Probability:')
print(prob_DONE_L1)

labels_DONE_L1, confidence_DONE_L1 = model_DONE_L1.predict(Data_L1, return_confidence=True)
print('Labels:')
print(labels_DONE_L1)
print('Confidence:')
print(confidence_DONE_L1)



Labels:
[0 0 0 ... 0 0 0]
Raw scores:
[0.00026097 0.00022262 0.00041682 ... 0.0001956  0.00021608 0.00019017]
Probability:
[[9.98492317e-01 1.50768290e-03]
 [9.99595916e-01 4.04084310e-04]
 [9.94007677e-01 5.99232316e-03]
 ...
 [1.00000000e+00 0.00000000e+00]
 [9.99784246e-01 2.15753864e-04]
 [1.00000000e+00 0.00000000e+00]]
Labels:
[0 0 0 ... 0 0 0]
Confidence:
[1. 1. 1. ... 1. 1. 1.]


In [15]:
#number of anomalous nodes labeled by DONE
np.sum(labels_DONE_L1)

169

In [16]:
#Grabbing seg_ids of anomalous nodes labeled by DONE
DONE_L1_AD = node_df_L1.copy()
DONE_L1_AD['DONE_labels'] = labels_DONE_L1
DONE_L1_AD = DONE_L1_AD[DONE_L1_AD['DONE_labels'] == 1]

In [12]:
#finding 80th percentile (top 20% of anomalous nodes)
percentile80 = np.percentile(prob_DONE_L1[:,1], 80)
L1_AD_indarray = prob_DONE_L1[:,1] > percentile80
DONE_L1_AD_80 = node_df_L1.iloc[L1_AD_indarray]

In [13]:
DONE_L1_AD_80.to_csv('DONE_L1_AD_80.csv')

In [22]:
#DONE_L1_AD.to_csv('DONE_L1_AD.csv')

## Layer 23

In [23]:
#Extract node table, edge table, and graph object (No orphans in node table or graph)
node_df_L23, edge_df_L23, G_L23 = filter_by_layer_connected(nodes, edges, 'L23')  

In [26]:
#one hot encode cell_polarity (excitatory vs inhibitory) and subclass (cell type)
node_df_hot_L23 = pd.get_dummies(node_df_L23, columns=["cell_polarity","subclass"], prefix={"Cell Type","Cell Classification"})
node_df_hot_L23 = node_df_hot_L23.drop(['id','timestamp','unique_pre_syn_target_count','unique_post_syn_target_count'],axis=1)
#Relabel pre_synaptic_count as out degree and vice versa
node_df_hot_L23 = node_df_hot_L23.rename(columns={'pre_syn_count':'out degree','post_syn_count':'in degree'}) #Relabel pre_synaptic_count as out degree and vice versa
#checking for unique cell types to set node attributes 
node_df_L23['subclass'].unique()

array(['23P', 'BPC', 'BC', 'NGC', 'MC', '4P', '5P-PT'], dtype=object)

In [27]:
node_attributes_L23=['in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC', 'Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory']

#set node attributes 
for index, row in node_df_hot_L23.iterrows():
     node_attr_dict_L23={k: float(row.to_dict()[k]) for k in node_attributes_L23}
     G_L23.nodes[row['pt_root_id']].update(node_attr_dict_L23)

In [28]:
#creating py geometric data object
Data_L23=utils.from_networkx(G_L23, group_node_attrs=('in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC','Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory'
        ))

In [29]:
#running DONE

model_DONE_L23 = DONE()
model_DONE_L23.fit(Data_L23)

labels_DONE_L23 = model_DONE_L23.predict(Data_L23)
print('Labels:')
print(labels_DONE_L23)

outlier_scores_DONE_L23 = model_DONE_L23.decision_function(Data_L23)
print('Raw scores:')
print(outlier_scores_DONE_L23)

prob_DONE_L23 = model_DONE_L23.predict_proba(Data_L23)
print('Probability:')
print(prob_DONE_L23)

labels_DONE_L23, confidence_DONE_L23 = model_DONE_L23.predict(Data_L23, return_confidence=True)
print('Labels:')
print(labels_DONE_L23)
print('Confidence:')
print(confidence_DONE_L23)



Labels:
[0 0 1 ... 0 0 0]
Raw scores:
[1.01505786e-04 9.92949572e-05 1.37910841e-03 ... 1.88745835e-05
 2.14031043e-05 2.08257916e-05]
Probability:
[[0.9877419  0.0122581 ]
 [0.98808485 0.01191515]
 [0.78955775 0.21044225]
 ...
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]]
Labels:
[0 0 1 ... 0 0 0]
Confidence:
[0.99999837 1.         1.         ... 1.         1.         1.        ]


In [30]:
#number of anomalous nodes labeled by DONE
np.sum(labels_DONE_L23)

1854

In [32]:
#Grabbing seg_ids of anomalous nodes labeled by DONE
DONE_L23_AD = node_df_L23.copy()
DONE_L23_AD['DONE_labels'] = labels_DONE_L23
DONE_L23_AD = DONE_L23_AD[DONE_L23_AD['DONE_labels'] == 1]

In [33]:
#finding 80th percentile (top 20% of anomalous nodes)
percentile80_L23 = np.percentile(prob_DONE_L23[:,1], 80)
L23_AD_indarray = prob_DONE_L23[:,1] > percentile80_L23
DONE_L23_AD_80 = node_df_L23.iloc[L23_AD_indarray]
DONE_L23_AD_80

Unnamed: 0,id,pt_root_id,pt_position,timestamp,layer,cell_polarity,subclass,pre_syn_count,post_syn_count,unique_pre_syn_target_count,unique_post_syn_target_count
0,223131,864691135695974042,[150880 111632 24610],2022-06-09 14:30:32.002707,L23,excitatory,23P,260.0,4928.0,236.0,4339.0
1,361213,864691135416006330,[217072 132656 19736],2022-06-09 14:30:32.002707,L23,excitatory,23P,222.0,3591.0,183.0,3211.0
2,518339,864691135733291156,[305536 121200 20212],2022-06-09 14:30:32.002707,L23,excitatory,23P,41.0,3921.0,31.0,3624.0
4,358680,864691135478435270,[217840 122528 16234],2022-06-09 14:30:32.002707,L23,excitatory,23P,309.0,3863.0,261.0,3534.0
6,610723,864691135544615976,[345568 110960 21799],2022-06-09 14:30:32.002707,L23,excitatory,23P,52.0,4023.0,41.0,3652.0
...,...,...,...,...,...,...,...,...,...,...,...
13390,422045,864691135737495409,[255104 112112 20252],2022-06-09 14:30:32.002707,L23,excitatory,23P,302.0,3951.0,257.0,3629.0
13516,583754,864691135564755031,[328064 129968 19433],2022-06-09 14:30:32.002707,L23,excitatory,23P,186.0,6069.0,165.0,5513.0
13582,222085,864691135181819394,[155504 111952 16400],2022-06-09 14:30:32.002707,L23,excitatory,23P,78.0,3424.0,70.0,3203.0
13622,224395,864691135194819882,[149840 133152 22592],2022-06-09 14:30:32.002707,L23,excitatory,23P,775.0,5583.0,683.0,4970.0


In [34]:
#DONE_L23_AD.to_csv('DONE_L23_AD.csv')

## Layer 4

In [38]:
#Extract node table, edge table, and graph object (No orphans in node table or graph)
node_df_L4, edge_df_L4, G_L4 = filter_by_layer_connected(nodes, edges, 'L4')  # sample run for subgraph L23

#one hot encode cell_polarity (excitatory vs inhibitory) and subclass (cell type)
node_df_hot_L4 = pd.get_dummies(node_df_L4, columns=["cell_polarity","subclass"], prefix={"Cell Type","Cell Classification"})
node_df_hot_L4 = node_df_hot_L4.drop(['id','timestamp','unique_pre_syn_target_count','unique_post_syn_target_count'],axis=1)
#Relabel pre_synaptic_count as out degree and vice versa
node_df_hot_L4 = node_df_hot_L4.rename(columns={'pre_syn_count':'out degree','post_syn_count':'in degree'}) #Relabel pre_synaptic_count as out degree and vice versa
#checking for unique cell types to set node attributes 
node_df_L4['subclass'].unique()

array(['4P', '23P', 'MC', 'BC', '5P-PT', '5P-IT', 'BPC', 'NGC'],
      dtype=object)

In [39]:
node_attributes_L4=['in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT','Cell Type_5P-IT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC', 'Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory']

#set node attributes 
for index, row in node_df_hot_L4.iterrows():
     node_attr_dict_L4={k: float(row.to_dict()[k]) for k in node_attributes_L4}
     G_L4.nodes[row['pt_root_id']].update(node_attr_dict_L4)

#creating py geometric data object
Data_L4=utils.from_networkx(G_L4, group_node_attrs=('in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT', 'Cell Type_5P-IT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC','Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory'
        ))

In [40]:
#running DONE

model_DONE_L4 = DONE()
model_DONE_L4.fit(Data_L4)

labels_DONE_L4 = model_DONE_L4.predict(Data_L4)
print('Labels:')
print(labels_DONE_L4)

outlier_scores_DONE_L4 = model_DONE_L4.decision_function(Data_L4)
print('Raw scores:')
print(outlier_scores_DONE_L4)

prob_DONE_L4 = model_DONE_L4.predict_proba(Data_L4)
print('Probability:')
print(prob_DONE_L4)

labels_DONE_L4, confidence_DONE_L4 = model_DONE_L4.predict(Data_L4, return_confidence=True)
print('Labels:')
print(labels_DONE_L4)
print('Confidence:')
print(confidence_DONE_L4)

Labels:
[0 0 0 ... 0 0 0]
Raw scores:
[1.28867296e-05 2.12281539e-05 1.68157549e-05 ... 1.10101246e-05
 1.25852212e-05 1.12638727e-05]
Probability:
[[1.00000000e+00 0.00000000e+00]
 [9.99959814e-01 4.01855083e-05]
 [9.99983673e-01 1.63268974e-05]
 ...
 [1.00000000e+00 0.00000000e+00]
 [1.00000000e+00 0.00000000e+00]
 [1.00000000e+00 0.00000000e+00]]
Labels:
[0 0 0 ... 0 0 0]
Confidence:
[1. 1. 1. ... 1. 1. 1.]


In [41]:
#number of anomalous nodes labeled by DONE
np.sum(labels_DONE_L4)

2158

In [42]:
#Grabbing seg_ids of anomalous nodes labeled by DONE
DONE_L4_AD = node_df_L4.copy()
DONE_L4_AD['DONE_labels'] = labels_DONE_L4
DONE_L4_AD = DONE_L4_AD[DONE_L4_AD['DONE_labels'] == 1]

In [43]:
#finding 80th percentile (top 20% of anomalous nodes)
percentile80_L4 = np.percentile(prob_DONE_L4[:,1], 80)
L4_AD_indarray = prob_DONE_L4[:,1] > percentile80_L4
DONE_L4_AD_80 = node_df_L4.iloc[L4_AD_indarray]
DONE_L4_AD_80

Unnamed: 0,id,pt_root_id,pt_position,timestamp,layer,cell_polarity,subclass,pre_syn_count,post_syn_count,unique_pre_syn_target_count,unique_post_syn_target_count
4,262975,864691135345049330,[164144 165872 22646],2022-06-09 14:30:32.002707,L4,excitatory,4P,489.0,3032.0,431.0,2510.0
11,261392,864691135883891312,[178128 157872 27288],2022-06-09 14:30:32.002707,L4,excitatory,4P,109.0,2501.0,88.0,2149.0
14,113155,864691135809476044,[101552 149616 21520],2022-06-09 14:30:32.002707,L4,excitatory,23P,48.0,561.0,44.0,505.0
15,361614,864691135212611456,[219984 139024 23200],2022-06-09 14:30:32.002707,L4,excitatory,4P,581.0,1778.0,507.0,1474.0
30,613811,864691136672229117,[350736 148192 16572],2022-06-09 14:30:32.002707,L4,excitatory,4P,84.0,1395.0,80.0,1272.0
...,...,...,...,...,...,...,...,...,...,...,...
16468,488178,864691135980833994,[292640 131488 15233],2022-06-09 14:30:32.002707,L4,excitatory,23P,170.0,1654.0,143.0,1451.0
16616,394838,864691135181766146,[231056 157584 22404],2022-06-09 14:30:32.002707,L4,excitatory,4P,95.0,1595.0,86.0,1361.0
16626,488489,864691135888492681,[290320 130144 17797],2022-06-09 14:30:32.002707,L4,excitatory,23P,95.0,5088.0,79.0,4567.0
16754,424424,864691135256493999,[255696 134624 24754],2022-06-09 14:30:32.002707,L4,excitatory,23P,132.0,2914.0,120.0,2476.0


In [44]:
#DONE_L4_AD.to_csv('DONE_L4_AD.csv')

## Layer 5

In [46]:
#Extract node table, edge table, and graph object (No orphans in node table or graph)
node_df_L5, edge_df_L5, G_L5 = filter_by_layer_connected(nodes, edges, 'L5')  # sample run for subgraph L23

#one hot encode cell_polarity (excitatory vs inhibitory) and subclass (cell type)
node_df_hot_L5 = pd.get_dummies(node_df_L5, columns=["cell_polarity","subclass"], prefix={"Cell Type","Cell Classification"})
node_df_hot_L5 = node_df_hot_L5.drop(['id','timestamp','unique_pre_syn_target_count','unique_post_syn_target_count'],axis=1)
#Relabel pre_synaptic_count as out degree and vice versa
node_df_hot_L5 = node_df_hot_L5.rename(columns={'pre_syn_count':'out degree','post_syn_count':'in degree'}) #Relabel pre_synaptic_count as out degree and vice versa
#checking for unique cell types to set node attributes 
node_df_L5['subclass'].unique()

array(['5P-IT', '4P', '5P-PT', 'BC', '6P-IT', 'MC', '5P-NP', '6P-CT',
       'NGC', '23P', 'BPC'], dtype=object)

In [47]:
node_attributes_L5=['in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT','Cell Type_5P-IT', 'Cell Type_6P-IT', 'Cell Type_5P-NP','Cell Type_6P-CT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC', 'Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory']

#set node attributes 
for index, row in node_df_hot_L5.iterrows():
     node_attr_dict_L5={k: float(row.to_dict()[k]) for k in node_attributes_L5}
     G_L5.nodes[row['pt_root_id']].update(node_attr_dict_L5)

#creating py geometric data object
Data_L5=utils.from_networkx(G_L5, group_node_attrs=('in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT', 'Cell Type_5P-IT', 'Cell Type_6P-IT', 'Cell Type_5P-NP','Cell Type_6P-CT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC','Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory'
        ))

In [48]:
#running DONE

model_DONE_L5 = DONE()
model_DONE_L5.fit(Data_L5)

labels_DONE_L5 = model_DONE_L5.predict(Data_L5)
print('Labels:')
print(labels_DONE_L5)

outlier_scores_DONE_L5 = model_DONE_L5.decision_function(Data_L5)
print('Raw scores:')
print(outlier_scores_DONE_L5)

prob_DONE_L5 = model_DONE_L5.predict_proba(Data_L5)
print('Probability:')
print(prob_DONE_L5)

labels_DONE_L5, confidence_DONE_L5 = model_DONE_L5.predict(Data_L5, return_confidence=True)
print('Labels:')
print(labels_DONE_L5)
print('Confidence:')
print(confidence_DONE_L5)



Labels:
[0 0 1 ... 0 0 0]
Raw scores:
[5.70137890e-05 2.67360156e-05 2.69707816e-04 ... 1.14318327e-05
 1.19045362e-05 1.15444136e-05]
Probability:
[[9.99643090e-01 3.56910493e-04]
 [9.99914042e-01 8.59577117e-05]
 [9.97739712e-01 2.26028817e-03]
 ...
 [1.00000000e+00 0.00000000e+00]
 [1.00000000e+00 0.00000000e+00]
 [1.00000000e+00 0.00000000e+00]]
Labels:
[0 0 1 ... 0 0 0]
Confidence:
[1. 1. 1. ... 1. 1. 1.]


In [49]:
#number of anomalous nodes labeled by DONE
np.sum(labels_DONE_L5)

1516

In [50]:
#Grabbing seg_ids of anomalous nodes labeled by DONE
DONE_L5_AD = node_df_L5.copy()
DONE_L5_AD['DONE_labels'] = labels_DONE_L5
DONE_L5_AD = DONE_L5_AD[DONE_L5_AD['DONE_labels'] == 1]

In [51]:
#finding 80th percentile (top 20% of anomalous nodes)
percentile80_L5 = np.percentile(prob_DONE_L5[:,1], 80)
L5_AD_indarray = prob_DONE_L5[:,1] > percentile80_L5
DONE_L5_AD_80 = node_df_L5.iloc[L5_AD_indarray]
DONE_L5_AD_80

Unnamed: 0,id,pt_root_id,pt_position,timestamp,layer,cell_polarity,subclass,pre_syn_count,post_syn_count,unique_pre_syn_target_count,unique_post_syn_target_count
2,161428,864691136031647163,[127216 177904 17254],2022-06-09 14:30:32.002707,L5,excitatory,4P,397.0,2813.0,345.0,2490.0
4,495262,864691135991244746,[284848 187728 21257],2022-06-09 14:30:32.002707,L5,excitatory,5P-IT,225.0,3145.0,193.0,2829.0
5,587355,864691135799761762,[339056 176048 19243],2022-06-09 14:30:32.002707,L5,excitatory,4P,74.0,3033.0,59.0,2712.0
7,335341,864691135407314633,[204800 183936 20737],2022-06-09 14:30:32.002707,L5,excitatory,5P-PT,83.0,1860.0,77.0,1679.0
10,493008,864691135476436136,[288320 174128 19656],2022-06-09 14:30:32.002707,L5,excitatory,5P-IT,182.0,3486.0,147.0,3091.0
...,...,...,...,...,...,...,...,...,...,...,...
12616,614899,864691135771683659,[349808 159536 24361],2022-06-09 14:30:32.002707,L5,excitatory,MC,129.0,2712.0,114.0,2406.0
12644,494299,864691135396392993,[282240 185040 16308],2022-06-09 14:30:32.002707,L5,excitatory,5P-IT,36.0,2403.0,26.0,2165.0
12741,335071,864691136065210264,[200480 184032 19506],2022-06-09 14:30:32.002707,L5,excitatory,5P-IT,357.0,3270.0,312.0,2913.0
12792,196080,864691135910064297,[135504 179584 16062],2022-06-09 14:30:32.002707,L5,excitatory,5P-IT,296.0,1878.0,255.0,1691.0


In [52]:
#DONE_L5_AD.to_csv('DONE_L5_AD.csv')

## Layer 6

In [55]:
#Extract node table, edge table, and graph object (No orphans in node table or graph)
node_df_L6, edge_df_L6, G_L6 = filter_by_layer_connected(nodes, edges, 'L6')  # sample run for subgraph L23

#one hot encode cell_polarity (excitatory vs inhibitory) and subclass (cell type)
node_df_hot_L6 = pd.get_dummies(node_df_L6, columns=["cell_polarity","subclass"], prefix={"Cell Type","Cell Classification"})
node_df_hot_L6 = node_df_hot_L6.drop(['id','timestamp','unique_pre_syn_target_count','unique_post_syn_target_count'],axis=1)
#Relabel pre_synaptic_count as out degree and vice versa
node_df_hot_L6 = node_df_hot_L6.rename(columns={'pre_syn_count':'out degree','post_syn_count':'in degree'}) #Relabel pre_synaptic_count as out degree and vice versa
#checking for unique cell types to set node attributes 
node_df_L6['subclass'].unique()

array(['6P-IT', '5P-IT', 'MC', '6P-CT', '5P-NP', 'BC', 'NGC', 'BPC',
       '5P-PT', '23P', '4P'], dtype=object)

In [57]:
node_attributes_L6=['in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT','Cell Type_5P-IT', 'Cell Type_6P-IT', 'Cell Type_5P-NP','Cell Type_6P-CT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC', 'Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory']

#set node attributes 
for index, row in node_df_hot_L6.iterrows():
     node_attr_dict_L6={k: float(row.to_dict()[k]) for k in node_attributes_L6}
     G_L6.nodes[row['pt_root_id']].update(node_attr_dict_L6)

#creating py geometric data object
Data_L6=utils.from_networkx(G_L6, group_node_attrs=('in degree','out degree',
        'Cell Type_23P', 'Cell Type_4P','Cell Type_5P-PT', 'Cell Type_5P-IT', 'Cell Type_6P-IT', 'Cell Type_5P-NP','Cell Type_6P-CT',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC','Cell Type_NGC',
        'Cell Classification_excitatory','Cell Classification_inhibitory'
        ))

In [58]:
#running DONE

model_DONE_L6 = DONE()
model_DONE_L6.fit(Data_L6)

labels_DONE_L6 = model_DONE_L6.predict(Data_L6)
print('Labels:')
print(labels_DONE_L6)

outlier_scores_DONE_L6 = model_DONE_L6.decision_function(Data_L6)
print('Raw scores:')
print(outlier_scores_DONE_L6)

prob_DONE_L6 = model_DONE_L6.predict_proba(Data_L6)
print('Probability:')
print(prob_DONE_L6)

labels_DONE_L6, confidence_DONE_L6 = model_DONE_L6.predict(Data_L6, return_confidence=True)
print('Labels:')
print(labels_DONE_L6)
print('Confidence:')
print(confidence_DONE_L6)



Labels:
[1 0 0 ... 0 0 0]
Raw scores:
[1.83547707e-03 1.50032865e-05 2.15295750e-05 ... 1.31555125e-05
 1.22527963e-05 1.31823153e-05]
Probability:
[[9.93765915e-01 6.23408540e-03]
 [9.99983537e-01 1.64630934e-05]
 [9.99961247e-01 3.87528933e-05]
 ...
 [9.99989848e-01 1.01522303e-05]
 [9.99992931e-01 7.06910514e-06]
 [9.99989756e-01 1.02437723e-05]]
Labels:
[1 0 0 ... 0 0 0]
Confidence:
[1. 1. 1. ... 1. 1. 1.]


In [59]:
#number of anomalous nodes labeled by DONE
np.sum(labels_DONE_L6)

2939

In [60]:
#Grabbing seg_ids of anomalous nodes labeled by DONE
DONE_L6_AD = node_df_L6.copy()
DONE_L6_AD['DONE_labels'] = labels_DONE_L6
DONE_L6_AD = DONE_L6_AD[DONE_L6_AD['DONE_labels'] == 1]

In [61]:
#finding 80th percentile (top 20% of anomalous nodes)
percentile80_L6 = np.percentile(prob_DONE_L6[:,1], 80)
L6_AD_indarray = prob_DONE_L6[:,1] > percentile80_L6
DONE_L6_AD_80 = node_df_L6.iloc[L6_AD_indarray]
DONE_L6_AD_80

Unnamed: 0,id,pt_root_id,pt_position,timestamp,layer,cell_polarity,subclass,pre_syn_count,post_syn_count,unique_pre_syn_target_count,unique_post_syn_target_count
0,204710,864691135497604883,[134208 247184 18598],2022-06-09 14:30:32.002707,L6,excitatory,6P-IT,111.0,2125.0,95.0,1924.0
3,302532,864691134917529098,[187504 208768 18182],2022-06-09 14:30:32.002707,L6,excitatory,MC,158.0,6564.0,128.0,5772.0
5,674643,864691135382500058,[368624 233456 19043],2022-06-09 14:30:32.002707,L6,excitatory,6P-CT,15.0,866.0,8.0,773.0
10,273184,864691135454040938,[163888 244368 18219],2022-06-09 14:30:32.002707,L6,excitatory,6P-IT,40.0,2106.0,28.0,1936.0
12,719779,864691136116277924,[378256 240304 19202],2022-06-09 14:30:32.002707,L6,excitatory,6P-IT,15.0,118.0,9.0,89.0
...,...,...,...,...,...,...,...,...,...,...,...
17222,339169,864691135272337169,[197184 212208 17814],2022-06-09 14:30:32.002707,L6,excitatory,6P-IT,191.0,3021.0,166.0,2679.0
17339,436307,864691135272381969,[245840 229120 23213],2022-06-09 14:30:32.002707,L6,excitatory,6P-CT,48.0,2516.0,36.0,2254.0
17370,87718,864691135657803394,[ 92672 242960 20599],2022-06-09 14:30:32.002707,L6,excitatory,6P-CT,3.0,74.0,1.0,58.0
17449,500581,864691135113150105,[291152 225744 18733],2022-06-09 14:30:32.002707,L6,excitatory,6P-IT,66.0,2212.0,54.0,2040.0


In [62]:
#DONE_L6_AD.to_csv('DONE_L6_AD.csv')

## White Matter

In [63]:
#Extract node table, edge table, and graph object (No orphans in node table or graph)
node_df_WM, edge_df_WM, G_WM = filter_by_layer_connected(nodes, edges, 'WM')  # sample run for subgraph L23

#one hot encode cell_polarity (excitatory vs inhibitory) and subclass (cell type)
node_df_hot_WM = pd.get_dummies(node_df_WM, columns=["cell_polarity","subclass"], prefix={"Cell Type","Cell Classification"})
node_df_hot_WM = node_df_hot_WM.drop(['id','timestamp','unique_pre_syn_target_count','unique_post_syn_target_count'],axis=1)
#Relabel pre_synaptic_count as out degree and vice versa
node_df_hot_WM = node_df_hot_WM.rename(columns={'pre_syn_count':'out degree','post_syn_count':'in degree'}) #Relabel pre_synaptic_count as out degree and vice versa
#checking for unique cell types to set node attributes 
node_df_WM['subclass'].unique()

array(['5P-PT', '6P-CT', '6P-IT', 'MC', '5P-NP', 'BPC', 'BC'],
      dtype=object)

In [64]:
node_attributes_WM=['in degree','out degree',
        'Cell Type_5P-PT', 'Cell Type_6P-CT', 'Cell Type_6P-IT','Cell Type_5P-NP',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC',
        'Cell Classification_excitatory','Cell Classification_inhibitory']

#set node attributes 
for index, row in node_df_hot_WM.iterrows():
     node_attr_dict_WM={k: float(row.to_dict()[k]) for k in node_attributes_WM}
     G_WM.nodes[row['pt_root_id']].update(node_attr_dict_WM)

#creating py geometric data object
Data_WM=utils.from_networkx(G_WM, group_node_attrs=('in degree','out degree',
        'Cell Type_5P-PT', 'Cell Type_6P-CT', 'Cell Type_6P-IT','Cell Type_5P-NP',
        'Cell Type_BC', 'Cell Type_BPC','Cell Type_MC',
        'Cell Classification_excitatory','Cell Classification_inhibitory'
        ))

In [65]:
#running DONE

model_DONE_WM = DONE()
model_DONE_WM.fit(Data_WM)

labels_DONE_WM = model_DONE_WM.predict(Data_WM)
print('Labels:')
print(labels_DONE_WM)

outlier_scores_DONE_WM = model_DONE_WM.decision_function(Data_WM)
print('Raw scores:')
print(outlier_scores_DONE_WM)

prob_DONE_WM = model_DONE_WM.predict_proba(Data_WM)
print('Probability:')
print(prob_DONE_WM)

labels_DONE_WM, confidence_DONE_WM = model_DONE_WM.predict(Data_WM, return_confidence=True)
print('Labels:')
print(labels_DONE_WM)
print('Confidence:')
print(confidence_DONE_WM)

Labels:
[0 0 0 0 0 0 1 0 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 0
 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
Raw scores:
[0.00312947 0.00353487 0.00719124 0.00508325 0.00509334 0.00580932
 0.02665239 0.00432963 0.00312666 0.00364128 0.01495589 0.0083654
 0.00415436 0.01461536 0.0075867  0.00258554 0.00991657 0.00406791
 0.00432683 0.0038073  0.0023117  0.00496632 0.00605575 0.00538175
 0.00129904 0.01090381 0.00345479 0.00359801 0.00312035 0.01171429
 0.00717447 0.00532101 0.00587297 0.01423551 0.00266228 0.07680725
 0.00401554 0.00676441 0.0106586  0.00774225 0.0065367  0.00908428
 0.00200922 0.00208934 0.0119444  0.00185817 0.00350617 0.00460245
 0.0068362  0.00745934 0.00170008 0.00192699 0.00395305 0.01427027
 0.00948308 



In [66]:
#number of anomalous nodes labeled by DONE
np.sum(labels_DONE_WM)

25

In [67]:
#Grabbing seg_ids of anomalous nodes labeled by DONE
DONE_WM_AD = node_df_WM.copy()
DONE_WM_AD['DONE_labels'] = labels_DONE_WM
DONE_WM_AD = DONE_WM_AD[DONE_WM_AD['DONE_labels'] == 1]

In [68]:
#finding 80th percentile (top 20% of anomalous nodes)
percentile80_WM = np.percentile(prob_DONE_WM[:,1], 80)
WM_AD_indarray = prob_DONE_WM[:,1] > percentile80_WM
DONE_WM_AD_80 = node_df_WM.iloc[WM_AD_indarray]
DONE_WM_AD_80

Unnamed: 0,id,pt_root_id,pt_position,timestamp,layer,cell_polarity,subclass,pre_syn_count,post_syn_count,unique_pre_syn_target_count,unique_post_syn_target_count
2,597214,864691135700041620,[340480 253104 25152],2022-06-09 14:30:32.002707,WM,excitatory,6P-CT,332.0,2168.0,251.0,1895.0
6,407442,864691135645051375,[234672 253728 18581],2022-06-09 14:30:32.002707,WM,excitatory,6P-IT,126.0,2551.0,92.0,2205.0
10,343716,864691135654019266,[201648 257392 18961],2022-06-09 14:30:32.002707,WM,excitatory,6P-CT,197.0,1747.0,154.0,1569.0
11,375867,864691136266410484,[220736 257136 19583],2022-06-09 14:30:32.002707,WM,excitatory,MC,113.0,1887.0,92.0,1549.0
13,407861,864691135323342236,[237488 256944 20989],2022-06-09 14:30:32.002707,WM,excitatory,6P-CT,362.0,3202.0,283.0,2844.0
14,132169,864691135181821954,[112208 257792 16298],2022-06-09 14:30:32.002707,WM,excitatory,6P-CT,244.0,1051.0,199.0,870.0
16,275443,864691135926663380,[166992 261696 18437],2022-06-09 14:30:32.002707,WM,excitatory,6P-CT,489.0,2181.0,409.0,1950.0
25,471029,864691135517733002,[262864 251984 26171],2022-06-09 14:30:32.002707,WM,excitatory,6P-IT,56.0,1970.0,44.0,1778.0
29,136093,864691135815483779,[109520 264176 18869],2022-06-09 14:30:32.002707,WM,excitatory,6P-IT,106.0,1747.0,92.0,1483.0
30,596258,864691135462824349,[332880 246144 19449],2022-06-09 14:30:32.002707,WM,excitatory,6P-CT,95.0,1730.0,73.0,1558.0


In [69]:
#DONE_WM_AD.to_csv('DONE_WM_AD.csv')