In [220]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import plotly.graph_objs as go
import plotly
import plotly.subplots as sp
import plotly.io as pio
from cloudvolume import CloudVolume as cv
from caveclient import CAVEclient
import meshparty
from meshparty import trimesh_vtk, trimesh_io 
from scipy.spatial import KDTree
import pygsheets
from collections import Counter
from sklearn.cluster import KMeans
import time
import pickle
import copy 
from scipy.spatial import cKDTree

vol =cv('gs://zheng_mouse_hippocampus_production/v2/seg_m153',parallel=True, progress=False, use_https=True)
client = CAVEclient('zheng_ca3')


In [2]:
# load MF-PyC ground truth table 
gc = pygsheets.authorize(service_file = "./hippca3-8126bea0d603.json")
sheet = gc.open('all_pyramidal_cells')
worksheet = sheet.worksheet('title','MF-pyr')
mfpc_df = worksheet.get_as_df(start='A4')


In [3]:
# get latest PC roots

nuclei_coord_in_vol = mfpc_df[['x','y','z']].to_numpy()
latest_roots_PC = []
print('Getting latest roots for PC...')  
cell_ids_scatter = vol.scattered_points(nuclei_coord_in_vol)
for i in range(0, len(nuclei_coord_in_vol)):
    if i % 100 == 0:
        print(i)
    cell_ID = [cell_ids_scatter.get(tuple(nuclei_coord_in_vol[i,:]))]
    if cell_ID[0] == 0:
        cell_ID = np.array([vol[nuclei_coord_in_vol[i,0]-150, nuclei_coord_in_vol[i,1], nuclei_coord_in_vol[i,2]].squeeze()])
    if cell_ID[0] == 0:
        cell_ID = np.array([vol[nuclei_coord_in_vol[i,0], nuclei_coord_in_vol[i,1]+150, nuclei_coord_in_vol[i,2]].squeeze()])
    if cell_ID[0] == 0:
        cell_ID = np.array([vol[nuclei_coord_in_vol[i,0], nuclei_coord_in_vol[i,1]-150, nuclei_coord_in_vol[i,2]].squeeze()])
    if cell_ID[0] == 0:
        print("Failed to find neuron segment ID using nuclei center")
    else:
        if client.chunkedgraph.is_latest_roots(cell_ID[0]):
            latest_roots_PC.append(cell_ID[0])
        else:
            latest_roots_PC.append(client.chunkedgraph.suggest_latest_roots(cell_ID[0]))
#latest_roots_PC_str = [str(num) for num in latest_roots_PC]

Getting latest roots for PC...
0
100
200
300
400
500
600


In [193]:
from scipy.spatial import ConvexHull, Delaunay
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from scipy.spatial import distance_matrix
from scipy.ndimage import label

def cluster_center(points):
    center = points.mean(axis=0)
    d = np.linalg.norm(points - center, axis=1)
    return [center]    

def get_mesh_in_bbox(vertices, faces, bbox):
    x_min, x_max, y_min, y_max, z_min, z_max = bbox
    mask = ((vertices[:, 0] >= x_min) & (vertices[:, 0] <= x_max) &
            (vertices[:, 1] >= y_min) & (vertices[:, 1] <= y_max) &
            (vertices[:, 2] >= z_min) & (vertices[:, 2] <= z_max))
    vertices_in_bbox = vertices[mask]
    vertex_indices = np.where(mask)[0] 
    
    faces_in_bbox = []
    for face in faces:
        if all(v in vertex_indices for v in face):
            faces_in_bbox.append(face)
    faces_in_bbox = np.array(faces_in_bbox)
    index_mapping = {global_idx: local_idx for local_idx, global_idx in enumerate(vertex_indices)}
    new_faces = np.array([[index_mapping[v] for v in face] for face in faces_in_bbox])
    
    return vertices_in_bbox, new_faces


def cluster_vertices_kmeans(vertices, n_clusters):
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    labels = kmeans.fit_predict(vertices)
    return labels, kmeans.cluster_centers_, kmeans.inertia_

    
def get_cluster_scores(points, n_cluster_max):
    kmeans_inertia = np.zeros((n_cluster_max,1))
    for i in range(0,n_cluster_max):
        _, _, kmeans_inertia[i] = cluster_vertices_kmeans(points, i+1)
    return kmeans_inertia
        

def decide_number_of_clusters(kmeans_inertia):    
    inertia_drop_rate = np.divide(kmeans_inertia[:-1], kmeans_inertia[1:])
    n_best_cluster = np.argmax(inertia_drop_rate) + 2
    if np.all(inertia_drop_rate < 1.8):
        n_best_cluster = 1
    return n_best_cluster, inertia_drop_rate


def split_mesh(vertices, faces, labels):
    sub_meshes = []
    for cluster_id in np.unique(labels):
        cluster_mask = labels == cluster_id
        cluster_vertices = vertices[cluster_mask]
        cluster_indices = np.where(cluster_mask)[0]

        cluster_faces = []
        for face in faces:
            if all(v in cluster_indices for v in face):
                cluster_faces.append(face)
        cluster_faces = np.array(cluster_faces)
        unique_indices = np.unique(cluster_faces)
        if cluster_faces.size == 0:
            sub_meshes.append((cluster_vertices, np.array([])))
            continue           
        index_mapping = {global_idx: local_idx for local_idx, global_idx in enumerate(cluster_indices)}
        new_faces = np.array([[index_mapping[v] for v in face] for face in cluster_faces])
        #face_normals = mesh.face_normals[np.isin(np.arange(len(mesh.faces)), np.flatnonzero(np.all(np.isin(mesh.faces, unique_indices), axis=1)))]
        
        sub_meshes.append((cluster_vertices, new_faces))
    return sub_meshes


def get_foreground_within_convhull(vertices, bw, bbx):
    foreground_coords = np.array(np.nonzero(bw)).T  + [bbx.minpt.x, bbx.minpt.y, bbx.minpt.z]
    hull = ConvexHull(vertices)
    delaunay = Delaunay(vertices[hull.vertices])
    inside_hull = delaunay.find_simplex(foreground_coords) >= 0
    foreground_inside = np.zeros_like(bw, dtype=np.uint8)
    for voxel, is_inside in zip(foreground_coords, inside_hull):
        if is_inside:
            foreground_inside[tuple(voxel - [bbx.minpt.x, bbx.minpt.y, bbx.minpt.z])] = 1
    return foreground_inside


def get_bbox(bw):
    coords = np.argwhere(bw)
    xmin, ymin, zmin = coords.min(axis=0)
    xmax, ymax, zmax = coords.max(axis=0)
    return [xmin, xmax, ymin, ymax, zmin, zmax]


def get_largest_cc(mask):
    labeled_mask, num_components = label(mask)
    if num_components == 0:
        return np.zeros_like(mask)  # No components found, return empty mask
    component_sizes = np.bincount(labeled_mask.ravel())
    largest_label = np.argmax(component_sizes[1:]) + 1  # Offset by 1 since we ignore background
    largest_component_mask = (labeled_mask == largest_label).astype(np.uint8)
    return largest_component_mask    

def is_orphan_bouton(mask):

    nx, ny, nz = mask.shape
    boundary_mask = np.zeros_like(mask, dtype=int)

    # Check all six faces of the 3D volume and collect touching points
    boundary_mask[0:2, :, :] = mask[0:2, :, :]  # x = 0
    boundary_mask[nx-3:nx-1, :, :] = mask[nx-3:nx-1, :, :]  # x = max
    boundary_mask[:, 0:2, :] = mask[:, 0:2, :]  # y = 0
    boundary_mask[:, ny-3:ny-1, :] = mask[:, ny-3:ny-1, :]  # y = max
    boundary_mask[:, :, 0:2] = mask[:, :, 0:2]  # z = 0
    boundary_mask[:, :, nz-3:nz-1] = mask[:, :,nz-3:nz-1]  # z = max
    
    boundary_touching_mask = np.logical_and(mask, boundary_mask).astype(int)

    if 1 in boundary_touching_mask:
        return True
    else:
        return False
    

def get_bbox_for_syn_cluster(syn_cluster, vol, ws):
    bbx = vol.bounds
    min_corner = np.min(syn_cluster, axis=0) / np.array([18,18,45]) *1000 
    max_corner = np.max(syn_cluster, axis=0) / np.array([18,18,45]) *1000
    bbx.minpt.x = max(bbx.minpt.x, min_corner[0] - ws)
    bbx.maxpt.x = min(bbx.maxpt.x, max_corner[0] + ws)
    bbx.minpt.y = max(bbx.minpt.y, min_corner[1] - ws)
    bbx.maxpt.y = min(bbx.maxpt.y, max_corner[1] + ws)
    bbx.minpt.z = max(bbx.minpt.z, min_corner[2] - (ws*2/5))
    bbx.maxpt.z = min(bbx.maxpt.z, max_corner[2] + (ws*2/5))    
    #bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
    #syn_center_bbox_vx = this_syn_center-[bbx.minpt.x, bbx.minpt.y, bbx.minpt.z]
    return bbx


##############################################
# TO DO: replace kmeans by decision boundary at global minimum of a histogram
# see Split_MF_boutons_test.ipynb
#############################################
def divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius):
    tree = cKDTree(vertices_bbox_microns)
    counts = tree.query_ball_point(vertices_bbox_microns, radius, return_length=True)
    counts_reshaped = counts.reshape(-1,1)
    kmeans = KMeans(n_clusters=2, random_state=42).fit(counts_reshaped)
    threshold_kmeans = np.mean(kmeans.cluster_centers_)
    counts_thresholded = (counts >= threshold_kmeans).astype(int)
    bouton_vertices = vertices_bbox_microns[counts >= threshold_kmeans]
    nonbouton_vertices = vertices_bbox_microns[counts < threshold_kmeans]

    return bouton_vertices, nonbouton_vertices, counts, counts_thresholded


def save_bouton_split_result_3D_mesh(vertices_bbox_microns, color_weights, latest_mf, latest_root_PC):

    fig = go.Figure()
    fig.add_trace(go.Scatter3d(
        x=vertices_bbox_microns[:, 0], y=vertices_bbox_microns[:, 1], z=vertices_bbox_microns[:, 2],
        mode='markers',
        marker=dict(size=1, color=color_weights, opacity=0.8, colorbar=dict(title="Neighbor Count")),
        name='vertices'
    ))        
    fig.update_layout(
        title="3D Scatter Plots",
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z'
        )
    )
    plotly.offline.plot(fig, filename='./result/bouton_split_ver3/'+str(latest_mf)+'_' + str(latest_root_PC)+'.html')


def save_bouton_split_result_3D_seg(binary_mask, latest_mf, latest_root_PC):
    xc, yc, zc = np.where(binary_mask == 1)
    fig = go.Figure()
    fig.add_trace(go.Scatter3d(
        x=xc, y=yc, z=zc*5/2,
        mode='markers',
        marker=dict(size=2, color='red', opacity=0.8, colorbar=dict(title="Neighbor Count")),
        name='vertices'
    ))        
    fig.update_layout(
        title="3D Scatter Plots",
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z'
        )
    )
    plotly.offline.plot(fig, filename='./result/bouton_split_ver3/seg'+str(latest_mf)+'_' + str(latest_root_PC)+'.html')
    

def get_flat_seg_id_from_coord(syn_center_pos, vol, latest_roots_PC):
    temp = []
    if len(syn_center_pos) > 9:
        for i in range(0,len(syn_center_pos)):    
            id_m153 = vol.scattered_points([syn_center_pos[i][0]]) 
            temp.append(list(id_m153.values())[0])
        temp2 = [item for item in temp if item not in latest_roots_PC]
        if len(temp2) == 0:
            return -1
        else:
            counts = Counter(temp2)
            root_id_m153 = counts.most_common(1)[0][0]
    else:
        for i in range(0,len(syn_center_pos)):    
            id_m153 = vol.scattered_points([syn_center_pos[i][0]]) 
            temp.append(list(id_m153.values())[0])
        temp2 = [item for item in temp if item not in latest_roots_PC]
        if len(temp2) == 0:
            return -1
        else:
            counts = Counter(temp2)
            root_id_m153 = counts.most_common(1)[0][0]      
    return root_id_m153


def get_flat_seg_id_from_sv(sv_ids, vol):
    root_ids = []
    for i in range(0,5):
        root_ids.append(client.chunkedgraph.get_root_id(syn_sv_ids[i][0]))
    counts = Counter(root_ids)
    root_id_m153 = counts.most_common(1)[0][0]
    return root_id_m153


In [None]:
'''
with open('./variables/potential_mf_ids_250225.pkl','rb') as f: 
    mf_proof_ids_flat = pickle.load(f)

syn_thresh = 4
mf_group = []
for i in range(0, len(latest_roots_PC)):
    syn = client.materialize.synapse_query(pre_ids=mf_proof_ids_flat, post_ids=latest_roots_PC[i], bounding_box=None, bounding_box_column='post_pt_position', 
                    timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                    split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                    synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
    freq = syn['pre_pt_root_id'].value_counts()
    pre_mf_ids = freq[freq > syn_thresh ].index.tolist()
    mf_group.append([latest_roots_PC[i],pre_mf_ids])

    if i % 10 == 0:
        print(i)
'''

In [223]:
with open('./variables/mf_group_th5_250302.pkl', 'rb') as f:  
    mf_group = pickle.load(f)
print(len(mf_group))


639


In [None]:
'''
# remove None
idx_none = [i for i, val in enumerate(mf_group) if val is None or len(val) == 0]
roots_pc_0114 = [a for i, a in enumerate(roots_pc_0114) if i not in idx_none]
mf_group = [a for i, a in enumerate(mf_group) if i not in idx_none]
'''

In [235]:
from scipy.spatial import distance
import time

colors = ['green','blue','yellow','purple','cyan',"black"]
mesh_dir = '../mesh_data/'

def extract_bouton_by_voxel_density(latest_mf, latest_root_PC, vol, syn_coords, radius, syn_sv_id, mm):
    mf_pyr_syn_thresh = 3
    dsyn_outlier_thresh = 4 #[microns]
    n_cluster_syn = 1
    result = []
    mesh_dir = '../mesh_data/'

    if syn_coords.shape[0] > mf_pyr_syn_thresh:
        syn_coords_microns = syn_coords * np.array([18,18,45]) / 1000
        syn_cluster_center_microns = cluster_center(syn_coords_microns)[0]  
 
        # count number of syn too far away from the center of all syn 
        # if there are such syn, then this neuron makes more than one bouton on target cell
        syn_cluster_center_vx = syn_cluster_center_microns / np.array([18,18,45]) * 1000
        n_outlier = 0
        for k in range(0, len(syn_coords_microns)):
            dist = distance.euclidean(syn_coords_microns[k], syn_cluster_center_microns)
            if dist > dsyn_outlier_thresh:
                n_outlier += 1
                
        if n_outlier > 4:  # at least two boutons
            #return ['double_bouton_candidate',latest_mf, latest_root_PC, this_syn_center ]           
            score = get_cluster_scores(syn_coords_microns, 3)
            n_cluster_syn, gain = decide_number_of_clusters(score)             
            syn_cluster_labels, syn_cluster_centers, _ = cluster_vertices_kmeans(syn_coords_microns, n_cluster_syn)
            unique_syn_labels, counts = np.unique(syn_cluster_labels, return_counts=True)
            valid_syn_clusters = unique_syn_labels[counts>mf_pyr_syn_thresh]
            print('at least two boutons')
            for i in range(0, n_cluster_syn):
                if i in valid_syn_clusters:
                    syn_cluster_points = syn_coords_microns[syn_cluster_labels == i]
                    this_syn_center_vx = syn_cluster_centers[i] / np.array([18,18,45]) * 1000
                    print('bouton', i)
                    if (this_syn_center_vx[2] > 200) and (this_syn_center_vx[2] < 2040):
                        if len(syn_cluster_points) < 24:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 200)
                        elif len(syn_cluster_points) < 36:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 250)
                        else:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 300)
                        bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
                        try:
                            mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
                        except:
                            print('no mesh')
                            return ['no mesh available', latest_mf, latest_root_PC, this_syn_center_vx]

                        vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
                        vertices_bbox_microns = vertices_bbox/1000
                        try:
                            vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
                            vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
                            bouton_bbox = copy.deepcopy(bbx)
                            bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
                            bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
                            seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf) 
                            seg_bouton = np.squeeze(seg_bouton, axis=-1)
                            bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)

                            result.append(['double_bouton',latest_mf, latest_root_PC, this_syn_center_vx, len(syn_coords), bouton_vol, bouton_bbox, syn_sv_id])
                        
                            #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
                            save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)
                        except:
                            print('likely to be not MF')
                            result.append(['not MF',latest_mf, latest_root_PC, this_syn_center_vx, len(syn_coords), syn_sv_id])
    
                        
        else: 
            
            
            if (syn_cluster_center_vx[2] > 200) and (syn_cluster_center_vx[2] < 2040):
                print('single bouton')
                if len(syn_coords_microns) < 24:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 400)
                elif len(syn_coords_microns) < 36:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 500)
                else:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 600)
                bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
                #syn_center_bbox_vx = this_syn_center-[bbx.minpt.x, bbx.minpt.y, bbx.minpt.z]
                try:
                    mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
                except:
                    print('no mesh')
                    return ['no mesh available', latest_mf, latest_root_PC, syn_cluster_center_vx]

                vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
                vertices_bbox_microns = vertices_bbox/1000
                vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
                vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
                bouton_bbox = copy.deepcopy(bbx)
                bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
                bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
                seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf)  
                seg_bouton = np.squeeze(seg_bouton, axis=-1)
                bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)
                    
                result.append([latest_mf, latest_root_PC, syn_cluster_center_vx, len(syn_coords), bouton_vol, bouton_bbox, syn_sv_id])
                
                #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
                save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)
        
            else:
                print('truncated bouton')
            
        return result

stime = time.time()
radius = 3
results = []
print('last i = ', len(latest_roots_PC)//3*1)
for i in range(0,1):
#for i in range(183,len(latest_roots_PC)//3*1):

    latest_root_mfs = mf_group[i][1]
    #latest_root_mfs = [648518346444590140,648518346440318053]
    if latest_root_mfs is not None:
        latest_root_pc = client.chunkedgraph.suggest_latest_roots(mf_group[i][0])
        #latest_root_pc = 648518346451792508
        syn = client.materialize.synapse_query(pre_ids=latest_root_mfs, post_ids=latest_root_pc, bounding_box=None, bounding_box_column='post_pt_position', 
                    timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                    split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                    synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
        for j in range(0,100):
#        for j in range(0,len(latest_root_mfs)):
#            if i==183 and j <= 46:   # to carry on from interrupted previous run
#                continue
#            time.sleep(1)
            
            mm = trimesh_io.MeshMeta(cv_path='gs://zheng_mouse_hippocampus_production/v2/seg_m153',
                                    disk_cache_path=mesh_dir, cache_size=20)
            syn_center_pos = syn.loc[syn['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_position']].values
            if len(syn_center_pos) == 0:
                latest_root_mfs[j] = client.chunkedgraph.suggest_latest_roots(latest_root_mfs[j])
                syn2 = client.materialize.synapse_query(pre_ids=latest_root_mfs[j], post_ids=latest_root_pc, bounding_box=None, bounding_box_column='post_pt_position', 
                    timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                    split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                    synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
                if len(syn2) > 0:
                    syn_center_pos = syn2.loc[syn2['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_position']].values
                    syn_sv_ids = syn2.loc[syn2['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_supervoxel_id']].values
                    syn_sv_id3 = [syn_sv_ids[0][0],syn_sv_ids[1][0],syn_sv_ids[2][0]]
                else:
                    continue
            else:
                syn_sv_ids = syn.loc[syn['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_supervoxel_id']].values
                syn_sv_id3 = [syn_sv_ids[0][0],syn_sv_ids[1][0],syn_sv_ids[2][0]]
                
            #mf_id_flat_seg_m153 = get_flat_seg_id_from_sv(syn_sv_ids, vol)
            mf_id_flat_seg_m153 = get_flat_seg_id_from_coord(syn_center_pos, vol, latest_roots_PC)
            syn_coords = np.array([arr[0] for arr in syn_center_pos])            

            if mf_id_flat_seg_m153 == -1:
                print('Cannot locate MF seg from flat segmentation, Trying latest segmentation...')
                mm = trimesh_io.MeshMeta(cv_path='graphene://https://minnie.microns-daf.com/segmentation/table/zheng_ca3',
                                        disk_cache_path=mesh_dir, cache_size=20)
                mf_id_flat_seg_m153 = latest_root_mfs[j]      
                try:
                    result = extract_bouton_by_voxel_density(mf_id_flat_seg_m153, latest_root_pc, vol, syn_coords, radius, syn_sv_id3, mm)
                    results.append(result)
                    print(i ,',', j)
                
                except:
                    print('Still cannot locate MF')
                    results.append(['Cannot locate MF using syn coords and supervoxels'])
                    print(i ,',', j)
                    print(mf_id_flat_seg_m153)
                    print(latest_root_pc) 
                
            else:
                result = extract_bouton_by_voxel_density(mf_id_flat_seg_m153, latest_root_pc, vol, syn_coords, radius, syn_sv_id3, mm)
                results.append(result)
                print(i ,',', j)
    
print('---------------------------------')
print('Done!')
etime = time.time()
print(etime-stime)
print('---------------------------------')
#print(results)

#with open("./variables/mf_double_bouton_split_best_250302_01.pkl", "wb") as file:
#    pickle.dump(results, file)

last i =  213
at least two boutons
bouton 0
bouton 1
0 , 0
single bouton
0 , 1
single bouton
0 , 2
single bouton
0 , 3
single bouton
0 , 4
single bouton
0 , 5
single bouton
0 , 6
single bouton
0 , 7
at least two boutons
bouton 0
bouton 1
0 , 8
single bouton
0 , 9
at least two boutons
bouton 0
bouton 1
0 , 10
single bouton
0 , 11
single bouton
0 , 12
single bouton
0 , 13
single bouton
0 , 14
single bouton
0 , 15
single bouton
0 , 16
single bouton
0 , 17
single bouton
0 , 18
single bouton
0 , 19
single bouton
0 , 20
single bouton
0 , 21
single bouton


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s]


0 , 22
single bouton
0 , 23
single bouton
0 , 24
single bouton
0 , 25
single bouton
0 , 26
single bouton
0 , 27
single bouton
0 , 28
single bouton
0 , 29
single bouton
0 , 30
single bouton
0 , 31
single bouton
0 , 32
single bouton
0 , 33
single bouton
0 , 34
single bouton
0 , 35
single bouton
0 , 36
single bouton
0 , 37
single bouton
0 , 38
single bouton
0 , 39
single bouton
0 , 40
single bouton
0 , 41
single bouton
0 , 42
single bouton
0 , 43
single bouton
0 , 44
single bouton
0 , 45
single bouton
0 , 46
single bouton
0 , 47
single bouton
0 , 48
single bouton
0 , 49
single bouton
0 , 50
single bouton
0 , 51
single bouton
0 , 52
single bouton
0 , 53
single bouton
0 , 54
at least two boutons
bouton 0
bouton 1
0 , 55
single bouton
0 , 56
single bouton
0 , 57
single bouton
0 , 58
single bouton
0 , 59
single bouton
0 , 60
single bouton
0 , 61
single bouton
0 , 62
single bouton
0 , 63
single bouton
0 , 64
single bouton
0 , 65
single bouton
0 , 66
single bouton
0 , 67
single bouton
0 , 68
si

In [180]:
with open('./variables/mf_group_th5_250302.pkl', 'rb') as f:  
    mf_group = pickle.load(f)
print(len(mf_group))

#len_group = [len(item) for item in bouton_vols_grouped]
gt_numMF = mfpc_df['GT_numMF'].values.tolist()
gt_pc_roots = mfpc_df['segid_0114'].values.tolist()

#print((len_group))
#print(gt_numMF)

numMF_thresh = 100
large_pyc = [a for a,b in zip(gt_pc_roots, gt_numMF) if b > numMF_thresh]
large_numMF = [b for b in gt_numMF if b > numMF_thresh]
print(large_numMF)

639
[124, 206, 224, 111, 102, 147, 157, 130, 255, 111, 103, 118, 105, 147, 188, 152, 127, 207, 133, 250, 103, 104, 227, 121, 114, 157, 142, 104, 109, 110, 129, 292, 137, 173, 122, 158, 287, 101]


In [214]:
# get missing boutons for large pyr cells with GT_numMF > 100


mesh_dir = '../mesh_data/'

def extract_bouton_by_voxel_density(latest_mf, latest_root_PC, vol, syn_coords, radius, syn_sv_id, mm):
    mf_pyr_syn_thresh = 1
    dsyn_outlier_thresh = 4 #[microns]
    n_cluster_syn = 1
    result = []
    mesh_dir = '../mesh_data/'

    if syn_coords.shape[0] > mf_pyr_syn_thresh:
        syn_coords_microns = syn_coords * np.array([18,18,45]) / 1000
        syn_cluster_center_microns = cluster_center(syn_coords_microns)[0]  
 
        # count number of syn too far away from the center of all syn 
        # if there are such syn, then this neuron makes more than one bouton on target cell
        syn_cluster_center_vx = syn_cluster_center_microns / np.array([18,18,45]) * 1000
        n_outlier = 0
        for k in range(0, len(syn_coords_microns)):
            dist = distance.euclidean(syn_coords_microns[k], syn_cluster_center_microns)
            if dist > dsyn_outlier_thresh:
                n_outlier += 1
                
        if n_outlier > 3:  # at least two boutons
            #return ['double_bouton_candidate',latest_mf, latest_root_PC, this_syn_center ]           
            score = get_cluster_scores(syn_coords_microns, 3)
            n_cluster_syn, gain = decide_number_of_clusters(score)             
            syn_cluster_labels, syn_cluster_centers, _ = cluster_vertices_kmeans(syn_coords_microns, n_cluster_syn)
            unique_syn_labels, counts = np.unique(syn_cluster_labels, return_counts=True)
            valid_syn_clusters = unique_syn_labels[counts>mf_pyr_syn_thresh]
            print('at least two boutons')
            for i in range(0, n_cluster_syn):
                if i in valid_syn_clusters:
                    syn_cluster_points = syn_coords_microns[syn_cluster_labels == i]
                    this_syn_center_vx = syn_cluster_centers[i] / np.array([18,18,45]) * 1000
                    print('bouton', i)
                    if (this_syn_center_vx[2] > 200) and (this_syn_center_vx[2] < 2040):
                        if len(syn_cluster_points) < 24:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 150)
                        elif len(syn_cluster_points) < 36:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 200)
                        else:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 250)
                        bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
                        try:
                            mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
                        except:
                            print('no mesh')
                            return ['no mesh available', latest_mf, latest_root_PC, this_syn_center_vx]

                        vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
                        vertices_bbox_microns = vertices_bbox/1000
                        try:
                            vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
                            vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
                            bouton_bbox = copy.deepcopy(bbx)
                            bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
                            bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
                            seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf) 
                            seg_bouton = np.squeeze(seg_bouton, axis=-1)
                            bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)

                            result.append(['double_bouton',latest_mf, latest_root_PC, this_syn_center_vx, len(syn_coords), bouton_vol, bouton_bbox, syn_sv_id])
                        
                            #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
                            #save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)
                        except:
                            print('likely to be not MF')
                            result.append(['not MF',latest_mf, latest_root_PC, this_syn_center_vx, len(syn_coords), syn_sv_id])
       
        else: 
            if (syn_cluster_center_vx[2] > 200) and (syn_cluster_center_vx[2] < 2040):
                print('single bouton')
                if len(syn_coords_microns) < 24:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 300)
                elif len(syn_coords_microns) < 36:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 400)
                else:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 500)
                bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
                #syn_center_bbox_vx = this_syn_center-[bbx.minpt.x, bbx.minpt.y, bbx.minpt.z]
                try:
                    mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
                except:
                    print('no mesh')
                    return ['no mesh available', latest_mf, latest_root_PC, syn_cluster_center_vx]

                vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
                vertices_bbox_microns = vertices_bbox/1000
                vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
                vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
                bouton_bbox = copy.deepcopy(bbx)
                bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
                bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
                seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf)  
                seg_bouton = np.squeeze(seg_bouton, axis=-1)
                bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)
                    
                result.append([latest_mf, latest_root_PC, syn_cluster_center_vx, len(syn_coords), bouton_vol, bouton_bbox, syn_sv_id])
                
                #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
                #save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)
        
            else:
                print('truncated bouton')
            
            
        return result


mf_group_pycid = [item[0] for item in mf_group]
radius = 2.5
#results = []
for i in range(32,len(large_pyc)):
#for i in range(0, 1):
    aa = mfpc_df.loc[mfpc_df['segid_0114'] == large_pyc[i], 'GT_MF'].tolist()
    if len(aa) == 0:
        idx_latest=latest_roots_PC.index(large_pyc[i])
        if client.chunkedgraph.get_latest_roots(mfpc_df['segid_0114'].values[idx_latest]) == large_pyc[i]:
            aa = mfpc_df.loc[mfpc_df['segid_0114'] == mfpc_df['segid_0114'].values[idx_latest],'GT_MF'].tolist()
        else:
            print('cannot find the correnspoding row from sheet')
    mf_gt_id_old_temp = [int(x.strip()) for x in aa[0].split(',')]
    mf_gt_id_old = [num for num in mf_gt_id_old_temp if not str(num).startswith('7')]
    mf_gt_id_new = []
    for j in range(0,len(mf_gt_id_old)):
        mf_gt_id_new.append(client.chunkedgraph.get_latest_roots(mf_gt_id_old[j]))
    mf_new_flat = np.concatenate(mf_gt_id_new).tolist()
    if large_pyc[i] in mf_group_pycid:
        idx_mf_group = mf_group_pycid.index(large_pyc[i])
    else:
        large_pyc_latest = client.chunkedgraph.suggest_latest_roots(large_pyc[i])
        idx_mf_group = mf_group_pycid.index(large_pyc_latest)
        
    latest_roots_mfs_used = mf_group[idx_mf_group][1]
    latest_roots_mfs_unused = [x for x in mf_new_flat if x not in latest_roots_mfs_used]
    print('used MFs', len(latest_roots_mfs_used))
    print('potentially unused MFs', len(latest_roots_mfs_unused))
    print('GT_numMF', large_numMF[i])
    
    latest_root_mfs = latest_roots_mfs_unused
    latest_root_pc = large_pyc[i]
    
    syn = client.materialize.synapse_query(pre_ids=latest_root_mfs, post_ids=latest_root_pc, bounding_box=None, bounding_box_column='post_pt_position', 
                timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
#        for j in range(0,1):
    for j in range(32, len(latest_root_mfs)):
        if i==32 and j <= 48:   # to carry on from interrupted previous run
            continue
        time.sleep(0.5)
        mm = trimesh_io.MeshMeta(cv_path='gs://zheng_mouse_hippocampus_production/v2/seg_m153',
                                disk_cache_path=mesh_dir, cache_size=20)
        syn_center_pos = syn.loc[syn['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_position']].values
        if len(syn_center_pos) == 0:
            latest_root_mfs[j] = client.chunkedgraph.suggest_latest_roots(latest_root_mfs[j])
            syn2 = client.materialize.synapse_query(pre_ids=latest_root_mfs[j], post_ids=latest_root_pc, bounding_box=None, bounding_box_column='post_pt_position', 
                timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
            if len(syn2) > 0:
                syn_center_pos = syn2.loc[syn2['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_position']].values
                syn_sv_ids = syn2.loc[syn2['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_supervoxel_id']].values
                
                if len(syn_sv_ids) > 2:
                    syn_sv_id3 = [syn_sv_ids[0][0],syn_sv_ids[1][0],syn_sv_ids[2][0]]
                else:
                    syn_sv_id3 = [syn_sv_ids[0][0]]
            else:
                continue
        else:
            syn_sv_ids = syn.loc[syn['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_supervoxel_id']].values
            if len(syn_sv_ids) > 2:
                syn_sv_id3 = [syn_sv_ids[0][0],syn_sv_ids[1][0],syn_sv_ids[2][0]]
            else:
                syn_sv_id3 = [syn_sv_ids[0][0]]
                

        #mf_id_flat_seg_m153 = get_flat_seg_id_from_sv(syn_sv_ids, vol)
        mf_id_flat_seg_m153 = get_flat_seg_id_from_coord(syn_center_pos, vol, latest_roots_PC)
        syn_coords = np.array([arr[0] for arr in syn_center_pos])            

        if mf_id_flat_seg_m153 == -1:
            print('Cannot locate MF seg from flat segmentation, Trying latest segmentation...')
            mm = trimesh_io.MeshMeta(cv_path='graphene://https://minnie.microns-daf.com/segmentation/table/zheng_ca3',
                                    disk_cache_path=mesh_dir, cache_size=20)
            mf_id_flat_seg_m153 = latest_root_mfs[j]      
            try:
                result = extract_bouton_by_voxel_density(mf_id_flat_seg_m153, latest_root_pc, vol, syn_coords, radius, syn_sv_id3, mm)
                results.append(result)
                print(i ,',', j)

            except:
                print('Still cannot locate MF')
                results.append(['Cannot locate MF using syn coords and supervoxels'])
                print(i ,',', j)
                print(mf_id_flat_seg_m153)
                print(latest_root_pc) 

        else:
            result = extract_bouton_by_voxel_density(mf_id_flat_seg_m153, latest_root_pc, vol, syn_coords, radius, syn_sv_id3, mm)
            results.append(result)
            print(i ,',', j)
        
    
print('---------------------------------')
print('Done!')

#with open("./variables/mf_bouton_split_best_250302_additional.pkl", "wb") as file:
#    pickle.dump(results, file)

used MFs 144
potentially unused MFs 98
GT_numMF 137
single bouton
32 , 62
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s]


32 , 63
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s]


32 , 73
single bouton
32 , 76
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.46it/s]


32 , 77
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.93it/s]


32 , 84
truncated bouton
32 , 85
single bouton
32 , 87
single bouton
32 , 95
used MFs 146
potentially unused MFs 103
GT_numMF 173
single bouton
33 , 32
33 , 33
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s]


33 , 39
single bouton
33 , 40
truncated bouton
33 , 41
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s]


33 , 42
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s]


33 , 43
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s]


33 , 47
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s]


33 , 48
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s]


33 , 51
single bouton
33 , 53
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.48it/s]


33 , 54
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s]


33 , 55
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s]


33 , 56
single bouton
33 , 61
33 , 62
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s]


33 , 63
single bouton
33 , 65
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s]


33 , 66
single bouton
33 , 67
single bouton
33 , 68
single bouton
33 , 69
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.35it/s]


33 , 70
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.38it/s]


33 , 71
single bouton
33 , 79
single bouton
33 , 81
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s]


33 , 83
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s]


33 , 84
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.18it/s]


33 , 85
single bouton
33 , 86
single bouton
33 , 87
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s]


33 , 88
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s]


33 , 90
33 , 101
used MFs 97
potentially unused MFs 98
GT_numMF 122
truncated bouton
34 , 34
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s]


34 , 35
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s]


34 , 36
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.20it/s]


34 , 37
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.75it/s]


34 , 39
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s]


34 , 41
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s]


34 , 42
single bouton
34 , 43
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.63it/s]


34 , 45
single bouton
34 , 46
single bouton
34 , 49
single bouton
34 , 51
single bouton
34 , 52
single bouton
34 , 53
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s]


34 , 56
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s]


34 , 59
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s]


34 , 60
single bouton
34 , 61
at least two boutons
bouton 0
bouton 1
34 , 62
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s]


34 , 65
truncated bouton
34 , 66
truncated bouton
34 , 67
truncated bouton
34 , 68
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.44it/s]


34 , 69
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.94it/s]


34 , 70
single bouton
34 , 72
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s]


34 , 73
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s]


34 , 77
single bouton
34 , 78
at least two boutons
bouton 0
bouton 1
34 , 79
single bouton
34 , 81
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s]


34 , 82
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s]


34 , 83
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.37it/s]


34 , 84
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s]


34 , 85
single bouton
34 , 86
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.05it/s]


34 , 87
34 , 88
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s]


34 , 89
34 , 90
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s]


34 , 91
34 , 92
single bouton
34 , 94
single bouton
34 , 95
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s]


34 , 96
used MFs 77
potentially unused MFs 127
GT_numMF 158
35 , 32
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s]


35 , 33
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s]


35 , 34
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s]


35 , 35
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.85it/s]


35 , 37
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s]


35 , 38
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s]


35 , 39
single bouton
35 , 40
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s]


35 , 42
single bouton
35 , 44
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s]


35 , 45
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s]


35 , 46
single bouton
35 , 49
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.47it/s]


35 , 50
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s]


35 , 52
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s]


35 , 53
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s]


35 , 55
single bouton
35 , 56
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s]


35 , 58
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s]


35 , 60
single bouton
35 , 64
35 , 65
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s]


35 , 66
35 , 67
single bouton
35 , 72
35 , 73
single bouton
35 , 76
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.77it/s]


35 , 79
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s]


35 , 80
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s]


35 , 81
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.11it/s]


35 , 82
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s]


35 , 83
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s]


35 , 84
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.50it/s]


35 , 87
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.97it/s]


35 , 88
35 , 91
35 , 92
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s]


35 , 93
35 , 94
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.34s/it]


35 , 95
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s]


35 , 100
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s]


35 , 101
at least two boutons
bouton 0
bouton 1
35 , 102
single bouton
35 , 103
single bouton
35 , 104
35 , 105
at least two boutons
bouton 0
35 , 107
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s]


35 , 108
35 , 112
single bouton
35 , 116
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.34it/s]


35 , 117
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.09it/s]


35 , 118
single bouton
35 , 119
single bouton
35 , 120
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.43it/s]


35 , 121
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s]


35 , 122
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.19it/s]


35 , 123
35 , 124
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s]


35 , 125
35 , 126
used MFs 170
potentially unused MFs 259
GT_numMF 287
36 , 33
single bouton
36 , 35
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.71it/s]

Number of distinct clusters (1) found smaller than n_clusters (2). Possibly due to duplicate points in X.



36 , 36
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s]


36 , 37
single bouton
36 , 39
36 , 40
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s]


36 , 41
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.91it/s]


36 , 42
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s]


36 , 43
36 , 44
single bouton
36 , 47
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s]


36 , 48
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s]


36 , 49
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s]


36 , 51
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s]


36 , 53
36 , 54
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.86it/s]


36 , 55
36 , 57
36 , 58
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s]


36 , 60
single bouton
36 , 63
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s]


36 , 66
36 , 67
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s]


36 , 68
36 , 70
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.58it/s]


36 , 71
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.72it/s]


36 , 74
truncated bouton
36 , 75
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s]


36 , 78
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s]


36 , 79
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s]


36 , 80
single bouton
36 , 95
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s]


36 , 97
single bouton
36 , 98
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.21it/s]


36 , 99
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.70it/s]


36 , 100
single bouton
36 , 106
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s]


36 , 107
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.10it/s]


36 , 108
36 , 109
single bouton
36 , 110
single bouton
36 , 113
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s]


36 , 114
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.93it/s]


36 , 115
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.52it/s]


36 , 118
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s]


36 , 119
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s]


36 , 120
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.13it/s]


36 , 121
at least two boutons
bouton 0


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.64it/s]


bouton 1
36 , 122
36 , 123
36 , 124
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.29it/s]


36 , 127
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s]


36 , 128
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.80it/s]


36 , 129
single bouton
36 , 131
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s]


36 , 132
36 , 139
single bouton
36 , 140
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.30it/s]


36 , 145
single bouton
36 , 146
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s]


36 , 150
single bouton
36 , 152
at least two boutons
bouton 0
bouton 1
36 , 153
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s]


36 , 156
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s]


36 , 157
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s]


36 , 160
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.02it/s]


36 , 161
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.25it/s]


36 , 162
truncated bouton
36 , 163
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s]


36 , 164
single bouton
36 , 165
single bouton
36 , 166
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s]


36 , 169
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s]


36 , 170
36 , 171
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.65it/s]


36 , 172
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.04it/s]


36 , 174
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.73it/s]


36 , 175
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s]


36 , 181
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.99it/s]


36 , 183
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.81it/s]


36 , 184
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s]


36 , 186
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.12it/s]


36 , 194
36 , 198
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s]


36 , 199
single bouton
36 , 202
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.16it/s]


36 , 204
single bouton
36 , 205
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.89it/s]


36 , 206
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s]


36 , 207
single bouton
36 , 208
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s]


36 , 209
single bouton
36 , 212
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.87it/s]


36 , 217
single bouton
36 , 218
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.13it/s]


36 , 219
single bouton
36 , 221
single bouton
36 , 227
single bouton
36 , 236
36 , 238
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.88it/s]


36 , 244
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s]


36 , 245
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.59it/s]


36 , 249
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.83it/s]


36 , 250
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.78it/s]


36 , 251
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.45it/s]


36 , 252
single bouton
36 , 253
36 , 254
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s]


36 , 258
used MFs 109
potentially unused MFs 69
GT_numMF 101
truncated bouton
37 , 38
single bouton
37 , 42
single bouton
37 , 45
single bouton
37 , 47
single bouton


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.50it/s]


37 , 51
single bouton
37 , 55
at least two boutons
bouton 0


100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s]


bouton 1
37 , 57
37 , 58
single bouton
37 , 63
37 , 65
37 , 66
---------------------------------
Done!


In [215]:
results = [item for item in results if item != None]
print(len(results))
with open("./variables/mf_bouton_split_best_250302_additional.pkl", "wb") as file:
    pickle.dump(results, file)

1494


In [170]:
# redo MFs with double/triple boutons as these require smaller bbx

colors = ['green','blue','yellow','purple','cyan',"black"]
mesh_dir = '../mesh_data/'

def extract_bouton_by_voxel_density2(latest_mf, latest_root_PC, vol, syn_coords, radius, syn_sv_id, mm):
    mf_pyr_syn_thresh = 3
    dsyn_outlier_thresh = 4 #[microns]
    n_cluster_syn = 1
    result = []
    mesh_dir = '../mesh_data/'

    if syn_coords.shape[0] > mf_pyr_syn_thresh:
        syn_coords_microns = syn_coords * np.array([18,18,45]) / 1000
        syn_cluster_center_microns = cluster_center(syn_coords_microns)[0]  
 
        # count number of syn too far away from the center of all syn 
        # if there are such syn, then this neuron makes more than one bouton on target cell
        syn_cluster_center_vx = syn_cluster_center_microns / np.array([18,18,45]) * 1000
        n_outlier = 0
        for k in range(0, len(syn_coords_microns)):
            dist = distance.euclidean(syn_coords_microns[k], syn_cluster_center_microns)
            if dist > dsyn_outlier_thresh:
                n_outlier += 1
                
        if n_outlier > 4:  # at least two boutons
            #return ['double_bouton_candidate',latest_mf, latest_root_PC, this_syn_center ]           
            score = get_cluster_scores(syn_coords_microns, 3)
            n_cluster_syn, gain = decide_number_of_clusters(score)             
            syn_cluster_labels, syn_cluster_centers, _ = cluster_vertices_kmeans(syn_coords_microns, n_cluster_syn)
            unique_syn_labels, counts = np.unique(syn_cluster_labels, return_counts=True)
            valid_syn_clusters = unique_syn_labels[counts>mf_pyr_syn_thresh]
            print('at least two boutons')
            for i in range(0, n_cluster_syn):
                if i in valid_syn_clusters:
                    syn_cluster_points = syn_coords_microns[syn_cluster_labels == i]
                    this_syn_center_vx = syn_cluster_centers[i] / np.array([18,18,45]) * 1000
                    print('bouton', i)
                    if (this_syn_center_vx[2] > 200) and (this_syn_center_vx[2] < 2040):
                        if len(syn_cluster_points) < 24:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 200)
                        elif len(syn_cluster_points) < 36:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 250)
                        else:
                            bbx=get_bbox_for_syn_cluster(syn_cluster_points, vol, 300)
                        bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
                        try:
                            mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
                        except:
                            print('no mesh')
                            return ['no mesh available', latest_mf, latest_root_PC, this_syn_center_vx]

                        vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
                        vertices_bbox_microns = vertices_bbox/1000
                        try:
                            vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
                            vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
                            bouton_bbox = copy.deepcopy(bbx)
                            bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
                            bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
                            seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf) 
                            seg_bouton = np.squeeze(seg_bouton, axis=-1)
                            bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)

                            result.append(['double_bouton',latest_mf, latest_root_PC, this_syn_center_vx, len(syn_coords), bouton_vol, bouton_bbox, syn_sv_id])
                        
                            #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
                            save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)
                        except:
                            print('likely to be not MF')
                            result.append(['not MF',latest_mf, latest_root_PC, this_syn_center_vx, len(syn_coords), syn_sv_id])
    
                        
        else: 
            
            
            if (syn_cluster_center_vx[2] > 200) and (syn_cluster_center_vx[2] < 2040):
                print('single bouton')
                if len(syn_coords_microns) < 24:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 400)
                elif len(syn_coords_microns) < 36:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 500)
                else:
                    bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 600)
                bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
                #syn_center_bbox_vx = this_syn_center-[bbx.minpt.x, bbx.minpt.y, bbx.minpt.z]
                try:
                    mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
                except:
                    print('no mesh')
                    return ['no mesh available', latest_mf, latest_root_PC, syn_cluster_center_vx]

                vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
                vertices_bbox_microns = vertices_bbox/1000
                vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
                vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
                bouton_bbox = copy.deepcopy(bbx)
                bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
                bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
                seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf)  
                seg_bouton = np.squeeze(seg_bouton, axis=-1)
                bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)
                    
                result.append([latest_mf, latest_root_PC, syn_cluster_center_vx, len(syn_coords), bouton_vol, bouton_bbox, syn_sv_id])
                
                #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
                save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)
        
            else:
                print('truncated bouton')
            
        return result

stime = time.time()
radius = 3
results = []
print('last i = ', len(latest_roots_PC)//3*1)
for i in range(0,1):
#for i in range(183,len(latest_roots_PC)//3*1):

    latest_root_mfs = mf_group[i][1]
    #latest_root_mfs = [648518346444590140,648518346440318053]
    if latest_root_mfs is not None:
        latest_root_pc = client.chunkedgraph.suggest_latest_roots(mf_group[i][0])
        #latest_root_pc = 648518346451792508
        syn = client.materialize.synapse_query(pre_ids=latest_root_mfs, post_ids=latest_root_pc, bounding_box=None, bounding_box_column='post_pt_position', 
                    timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                    split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                    synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
        for j in range(0,100):
#        for j in range(0,len(latest_root_mfs)):
#            if i==183 and j <= 46:   # to carry on from interrupted previous run
#                continue
#            time.sleep(1)
            
            mm = trimesh_io.MeshMeta(cv_path='gs://zheng_mouse_hippocampus_production/v2/seg_m153',
                                    disk_cache_path=mesh_dir, cache_size=20)
            syn_center_pos = syn.loc[syn['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_position']].values
            if len(syn_center_pos) == 0:
                latest_root_mfs[j] = client.chunkedgraph.suggest_latest_roots(latest_root_mfs[j])
                syn2 = client.materialize.synapse_query(pre_ids=latest_root_mfs[j], post_ids=latest_root_pc, bounding_box=None, bounding_box_column='post_pt_position', 
                    timestamp=None, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                    split_positions=False, desired_resolution=[18,18,45], materialization_version=None, 
                    synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
                if len(syn2) > 0:
                    syn_center_pos = syn2.loc[syn2['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_position']].values
                    syn_sv_ids = syn2.loc[syn2['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_supervoxel_id']].values
                    syn_sv_id3 = [syn_sv_ids[0][0],syn_sv_ids[1][0],syn_sv_ids[2][0]]
                else:
                    continue
            else:
                syn_sv_ids = syn.loc[syn['pre_pt_root_id'] == latest_root_mfs[j], ['pre_pt_supervoxel_id']].values
                syn_sv_id3 = [syn_sv_ids[0][0],syn_sv_ids[1][0],syn_sv_ids[2][0]]
                
            #mf_id_flat_seg_m153 = get_flat_seg_id_from_sv(syn_sv_ids, vol)
            mf_id_flat_seg_m153 = get_flat_seg_id_from_coord(syn_center_pos, vol, latest_roots_PC)
            syn_coords = np.array([arr[0] for arr in syn_center_pos])            

            if mf_id_flat_seg_m153 == -1:
                print('Cannot locate MF seg from flat segmentation, Trying latest segmentation...')
                mm = trimesh_io.MeshMeta(cv_path='graphene://https://minnie.microns-daf.com/segmentation/table/zheng_ca3',
                                        disk_cache_path=mesh_dir, cache_size=20)
                mf_id_flat_seg_m153 = latest_root_mfs[j]      
                try:
                    result = extract_bouton_by_voxel_density(mf_id_flat_seg_m153, latest_root_pc, vol, syn_coords, radius, syn_sv_id3, mm)
                    results.append(result)
                    print(i ,',', j)
                
                except:
                    print('Still cannot locate MF')
                    results.append(['Cannot locate MF using syn coords and supervoxels'])
                    print(i ,',', j)
                    print(mf_id_flat_seg_m153)
                    print(latest_root_pc) 
                
            else:
                result = extract_bouton_by_voxel_density(mf_id_flat_seg_m153, latest_root_pc, vol, syn_coords, radius, syn_sv_id3, mm)
                results.append(result)
                print(i ,',', j)
    
print('---------------------------------')
print('Done!')
etime = time.time()
print(etime-stime)
print('---------------------------------')
#print(results)

#with open("./variables/mf_double_bouton_split_best_250302_01.pkl", "wb") as file:
#    pickle.dump(results, file)

143


In [165]:
large_pyc = [648518346450460332, 648518346452912185, 648518346451792508, 648518346440578773, 648518346469564998, 648518346437569326, 648518346440390320, 648518346444248649, 648518346447524619, 648518346457331314, 648518346441429719, 648518346441331671, 648518346436615813, 648518346434473233, 648518346466920945, 648518346450859343, 648518346442117893, 648518346463029354, 648518346442903118, 648518346436797822, 648518346438680762, 648518346442163249, 648518346451022260, 648518346446683356, 648518346445409655, 648518346440942118, 648518346438847348, 648518346437232646, 648518346442419701, 648518346439159708, 648518346440489648, 648518346442090245, 648518346437496155, 648518346437590538, 648518346445019926, 648518346447704211, 648518346441998053, 648518346447679026]
#print(mfpc_df['segid_0114'] == large_pyc[i])

mf_group_pycid = [item[0] for item in mf_group]


639


In [127]:
mm = trimesh_io.MeshMeta(cv_path='graphene://https://minnie.microns-daf.com/segmentation/table/zheng_ca3',
                                        disk_cache_path=mesh_dir, cache_size=20)
mesh = mm.mesh(seg_id = 648518346440318053, remove_duplicate_vertices=True)


In [134]:
mf_pyr_syn_thresh = 5
dsyn_outlier_thresh = 4 #[microns]
n_cluster_syn = 1
result = []
mesh_dir = '../mesh_data/'

latest_mf = 648518346440318053
latest_root_PC = 648518346451792508

if syn_coords.shape[0] > mf_pyr_syn_thresh:
    syn_coords_microns = syn_coords * np.array([18,18,45]) / 1000
    syn_cluster_center_microns = cluster_center(syn_coords_microns)[0]  

    # count number of syn too far away from the center of all syn 
    # if there are such syn, then this neuron makes more than one bouton on target cell
    syn_cluster_center_vx = syn_cluster_center_microns / np.array([18,18,45]) * 1000
    n_outlier = 0

    if (syn_cluster_center_vx[2] > 200) and (syn_cluster_center_vx[2] < 2040):
        print('single bouton')
        if len(syn_coords_microns) < 24:
            bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 400)
        elif len(syn_coords_microns) < 36:
            bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 500)
        else:
            bbx=get_bbox_for_syn_cluster(syn_coords_microns, vol, 600)
        bbox_nm = np.array([bbx.minpt.x, bbx.maxpt.x, bbx.minpt.y, bbx.maxpt.y, bbx.minpt.z, bbx.maxpt.z]) * np.array([18,18,18,18,45,45])
        #syn_center_bbox_vx = this_syn_center-[bbx.minpt.x, bbx.minpt.y, bbx.minpt.z]

        try:
            mesh = mm.mesh(seg_id = latest_mf, remove_duplicate_vertices=True)
        except:
            print('no mesh')

        vertices_bbox, faces_bbox = get_mesh_in_bbox(mesh.vertices, mesh.faces, bbox_nm)
        vertices_bbox_microns = vertices_bbox/1000
        vertices_bouton,vertices_nonbouton,counts,counts_thresholded = divide_vertices_by_count_neighboring_voxels(vertices_bbox_microns, radius)
        vertices_bouton_vx = np.round(vertices_bouton / np.array([18,18,45]) * 1000, decimals=3)
        bouton_bbox = copy.deepcopy(bbx)
        bouton_bbox.minpt.x, bouton_bbox.minpt.y, bouton_bbox.minpt.z = vertices_bouton_vx.min(axis=0)
        bouton_bbox.maxpt.x, bouton_bbox.maxpt.y, bouton_bbox.maxpt.z = vertices_bouton_vx.max(axis=0)  
        seg_bouton = vol.download(bbox=bouton_bbox, label=latest_mf)  
        seg_bouton = np.squeeze(seg_bouton, axis=-1)
        bouton_vol = np.round(np.sum(seg_bouton) * 0.018 * 0.018 * 0.045, decimals=4)

        result.append([latest_mf, latest_root_PC, syn_cluster_center_vx, len(syn_coords), bouton_vol, bouton_bbox])

        #save_bouton_split_result_3D_seg(seg_bouton, latest_mf, latest_root_PC)
        #save_bouton_split_result_3D_mesh(vertices_bbox_microns, counts_thresholded, latest_mf, latest_root_PC)

    else:
        print('truncated bouton')




single bouton


In [135]:
print(result)

[[648518346440318053, 648518346451792508, array([53776.14285714, 60805.71428571,  1142.14285714]), 7, 14.4452, Bbox([53133, 59903, 1003],[54283, 61878, 1267], dtype=np.int32, unit='vx')]]


In [8]:
print(i,j)
print(latest_root_mfs[j])
print( mf_group[i][1][j])
print(mf_id_flat_seg_m153)
print(latest_root_pc)
#print((syn_coords))
print(648518346450460332 in latest_roots_PC)

20 109
648518346449241729
648518346449241729
648518346432558242
648518346451792508
True


In [83]:
# load MF-PyC boutons returned by splitter

# Path to the pickle file
file_paths = ["./variables/mf_bouton_split_best_250302_01.pkl", "./variables/mf_bouton_split_best_250302_02.pkl",
              "./variables/mf_bouton_split_best_250302_03.pkl","./variables/mf_bouton_split_best_250302_04.pkl",
              "./variables/mf_bouton_split_best_250302_05.pkl","./variables/mf_bouton_split_best_250302_06.pkl"]

# Load the variable from the pickle file
variable_loaded = []
for i in range(0, len(file_paths)):
    with open(file_paths[i], 'rb') as f:
        loaded_variable = pickle.load(f)
        
        # bouton_info: [MF pre_id, PyC post_id, syn_cluster_center, num_syn, bouton_vol]
        variable_loaded = variable_loaded + loaded_variable



length_data = [len(sublist) for sublist in variable_loaded]
possible_len = list(set(length_data))
print(possible_len)

len0 = [sublist for sublist in variable_loaded if len(sublist)==0]  # empty result
len1 = [sublist for sublist in variable_loaded if len(sublist)==1]  # single bouton result
len2 = [sublist for sublist in variable_loaded if len(sublist)==2]  # double bouton result
len3 = [sublist for sublist in variable_loaded if len(sublist)==3]  # double bouton result
len4 = [sublist for sublist in variable_loaded if len(sublist)==4]  # no mesh available

print('number of empty results', len(len0))  
print('number of single boutons', len(len1))
print('number of double boutons', len(len2))
print('number of triple boutons', len(len3))
print('number of missing meshes', len(len4))

variable_flattened = []
for sublist in variable_loaded:
    if len(sublist) == 0:
        continue
    elif len(sublist) == 1:
        variable_flattened.extend(sublist)
    elif len(sublist) == 2:
        variable_flattened.extend(sublist)
    elif len(sublist) == 3:
        variable_flattened.extend(sublist)
    else:
        continue


length_data = [len(sublist) for sublist in variable_flattened]
possible_len = list(set(length_data))
print(possible_len)

len6 = [sublist for sublist in variable_flattened if len(sublist)==6]  # empty result
len7 = [sublist for sublist in variable_flattened if len(sublist)==7]  # single bouton result
len8 = [sublist for sublist in variable_flattened if len(sublist)==8]  # double bouton result

variable_final = [['single_bouton']+ sublist for sublist in len7] + len8
with open("./variables/mf_bouton_split_best_250302_all.pkl", "wb") as file:
    pickle.dump(variable_final, file)        


[0, 1, 2, 3, 4]
number of empty results 2072
number of single boutons 22022
number of double boutons 545
number of triple boutons 14
number of missing meshes 46
[8, 6, 7]


[0, 1, 2, 3, 4]
2072
22022
545
14
46


In [None]:
root_ids = []
for i in range(0,5):
    root_ids.append(client.chunkedgraph.get_root_id(syn_sv_ids[i][0]))
print(root_ids)

counts = Counter(root_ids)
latest_MF = counts.most_common(1)[0][0]
print(latest_MF)

In [75]:

print((variable_flattened[0]))
print((variable_flattened[1]))
print((variable_flattened[2]))
print((variable_flattened[3]))
print((variable_flattened[4]))


['double_bouton', 648518346441957457, 648518346450460332, array([56701.27272727, 70359.90909091,  1656.36363636]), 50, 5.5703, Bbox([56640, 70266, 1597],[56811, 70414, 1717], dtype=np.int32, unit='vx'), [76990141041762730, 77060440865584760, 77060440865569400]]
['double_bouton', 648518346441957457, 648518346450460332, array([57048.25641026, 69754.33333333,  1117.74358974]), 50, 23.7899, Bbox([56946, 69604, 1056],[57170, 69886, 1190], dtype=np.int32, unit='vx'), [76990141041762730, 77060440865584760, 77060440865569400]]
[648518346460022915, 648518346450460332, array([56302.17391304, 70139.93478261,   479.97826087]), 46, 28.8449, Bbox([56223, 69981, 403],[56412, 70282, 571], dtype=np.int32, unit='vx'), [76919771962307881, 76919771962429820, 76919771962383835]]
[648518346457833909, 648518346450460332, array([57026.11627907, 69733.74418605,  1502.46511628]), 43, 26.556, Bbox([56884, 69500, 1415],[57112, 69879, 1548], dtype=np.int32, unit='vx'), [77060440999623018, 77060440999530641, 770604

In [77]:
print(len6[0])
print(len7[0])
print(len8[0])

['not MF', 648518346432558242, 648518346451792508, array([54318.71428571, 60846.        ,  1395.42857143]), 9, [76496254028419770, 76355516405775768, 76637060303481611]]
['not MF', 648518346425208473, 648518346439126428, array([45858.33333333, 56339.66666667,  1644.33333333]), 9, [75510541854230492, 75510541787107911, 75510541787321659]]
['not MF', 648518346431973039, 648518346451006900, array([55073.        , 56655.16666667,  1768.5       ]), 10, [76636647852210500, 76636647852206995, 76636647852281697]]
[648518346460022915, 648518346450460332, array([56302.17391304, 70139.93478261,   479.97826087]), 46, 28.8449, Bbox([56223, 69981, 403],[56412, 70282, 571], dtype=np.int32, unit='vx'), [76919771962307881, 76919771962429820, 76919771962383835]]
['double_bouton', 648518346441957457, 648518346450460332, array([56701.27272727, 70359.90909091,  1656.36363636]), 50, 5.5703, Bbox([56640, 70266, 1597],[56811, 70414, 1717], dtype=np.int32, unit='vx'), [76990141041762730, 77060440865584760, 770

In [None]:
pre_mfs_old = bouton_info['pre_mf_id'].values
post_pyc_old = bouton_info['post_pyc_id'].values
pre_mfs = []
post_pyc = []

for i in range(0, len(pre_mfs_old)):
    pre_mfs.append(client.chunkedgraph.suggest_latest_roots(pre_mfs_old[i]))
    
for i in range(0, len(post_pyc_old)):
    post_pyc.append(client.chunkedgraph.suggest_latest_roots(post_pyc_old[i]))


In [None]:
# load MF-PyC boutons grouped by target PyC

# Path to the pickle file
file_paths = ['./variables/mf_pyc_gt_bouton_quick_split_1_of_4.pkl', './variables/mf_pyc_gt_bouton_quick_split_2_of_4.pkl',
              './variables/mf_pyc_gt_bouton_quick_split_3_of_4.pkl','./variables/mf_pyc_gt_bouton_quick_split_4_of_4.pkl']

# Load the variable from the pickle file
latest_pc_roots = []
bouton_vols_grouped = []
latest_mf_roots = []
for i in range(0, len(file_paths)):
    with open(file_paths[i], 'rb') as f:
        loaded_variable = pickle.load(f)
        
        # bouton_info: [MF pre_id, PyC post_id, syn_cluster_center, num_syn, bouton_vol]
        latest_pc_roots = latest_pc_roots + loaded_variable[0]
        bouton_vols_grouped = bouton_vols_grouped + loaded_variable[1]

# Display the loaded variable
#bouton_group_info = pd.DataFrame(variable_loaded, columns=['latest_roots_PC','bouton_vol_um3'])

# remove those without any MFs
idx_none = [i for i, val in enumerate(bouton_vols_grouped) if val is None or len(val) == 0]
latest_pc_roots = [a for i, a in enumerate(latest_pc_roots) if i not in idx_none]
bouton_vols_grouped = [a for i, a in enumerate(bouton_vols_grouped) if i not in idx_none]

In [None]:
# sort the list by the number of presyn-MFs

numMFs = [len(sublist) for sublist in bouton_vols_grouped]
print(numMFs)
plt.hist(numMFs)