# Setup

## Import packages

In [3]:
############################################################################################################################ 
# Get the latest CREST files for each ID within the target folder (dirname)

from pathlib import Path
import json
from sqlite3 import connect as sqlite3_connect
from sqlite3 import DatabaseError
from google.cloud import bigquery
import cloudvolume
from igraph import Graph as ig_Graph
from igraph import plot as ig_plot
from scipy.optimize import curve_fit
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from scipy.stats import zscore
from random import choice as random_choice
from itertools import combinations
from numpy import array, unravel_index, argmin, mean
import random
import numpy as np
from copy import deepcopy
import itertools
from datetime import datetime
from time import time
import neuroglancer
from webbrowser import open as wb_open
from webbrowser import open_new as wb_open_new
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm


import sys
sys.path.append('/Users/kperks/Documents/ell-connectome/efish_em/efish_em')

# from eCREST_cli_beta import ecrest, import_settings
from eCREST_cli import ecrest
import AnalysisCode as efish 

## Datapath settings

If you save a copy of settings_dict.json (found in the "under construction" directory of eCREST repo) locally somewhere outside the repo (like in your save_dir), then you can use the following code cell to import. This avoids needing to re-type the save_dir and db_path each time you "git pull" updates from the repo to this notebook.

In [4]:
path_to_settings_json = '/Users/kperks/Documents/ell-connectome/eCREST-local-files/settings_dict.json'
settings_dict = efish.import_settings(path_to_settings_json)

vx_sizes = [16,16,30]

## Molecular layer fit

In [7]:
neuroglancer_path = Path(settings_dict['save_dir']).parent.parent / 'blender/soma_locations/layer-molecular_annotation.json'

with open(Path(neuroglancer_path), 'r') as myfile: # 'p' is the dirpath and 'f' is the filename from the created 'd' dictionary
    neuroglancer_data = json.load(myfile)

set([item['name'] for item in neuroglancer_data['layers'] if item['type']=='annotation'])

nl_ = 'molecular'
neuroglancer_layer = next((item for item in neuroglancer_data['layers'] if item["name"] == nl_), None)
voxel_sizes = [16,16,30]

vertices = [[p['point'][i]*voxel_sizes[i] for i in range(3)] for p in neuroglancer_layer['annotations']] #[p['point'] for p in neuroglancer_layer['annotations']]#

x_pts = [p[0] for p in vertices]
y_pts = [p[1] for p in vertices]
z_pts = [p[2] for p in vertices]

# Perform curve fitting
popt, pcov = curve_fit(efish.func_planar_curve, (x_pts, z_pts), y_pts)

# Print optimized parameters
print(popt)


[ 2.71956920e+05 -5.43115077e-02 -1.87026179e-01 -3.46153667e-07
  2.31048373e-06  9.59242290e-13 -1.51595014e-11  6.68290149e-07]


## Load reconstruction files

In [8]:
dirpath = Path(settings_dict['save_dir'])

nodefiles = efish.get_cell_filepaths(dirpath)

# cell types for all files in directory

## from file

In [9]:
df_type = pd.read_csv(dirpath / 'metadata/df_type_auto_typed.csv')

# Granule Cell Axons

## df_syn 

In [10]:
df_syn = pd.read_csv(dirpath / 'graphs/df_postsyn.csv')
syn = 'post-synaptic'

In [11]:
len(df_syn)

20399

In [12]:
y_adj_col = []
for i,r in df_syn.iterrows():
    yoffset = efish.func_planar_curve((r['x'], r['z']), *popt)
    y_adj = (r['y'] - yoffset)
    y_adj_col.append(y_adj)

df_syn.loc[:,'y_adj']=y_adj_col

for v in ['x','y','z','y_adj']:
    df_syn[v] = df_syn[v]/1000
df_syn['y_adj'] = df_syn['y_adj']*-1    

In [13]:
# pf_df = deepcopy(df_syn[(df_syn['pre_type'].isin(['pf']))&(df_syn['post_type'].isin(['mg1','mg2','lg','lf']))])
# pf_df['x']=pf_df['x'].apply(lambda x: x/16)
# pf_df['y']=pf_df['y'].apply(lambda x: x/16)
# pf_df['z']=pf_df['z'].apply(lambda x: x/30)

# pf_df.to_csv(Path('/Users/kperks/Downloads/pf_synapses.csv'))

## add cell type to df_syn

In [14]:
for i,r in df_syn.iterrows():
    try:
        df_syn.loc[i,'pre_type'] =df_type[df_type['id'].isin([r['pre']])].cell_type.values[0]
        df_syn.loc[i,'post_type']=df_type[df_type['id'].isin([r['post']])].cell_type.values[0]
    except:
        print(r['pre'],r['post'])
        continue

df_syn.loc[:,'post_type'] = [t.lower() for t in df_syn['post_type']]
df_syn.loc[:,'pre_type'] = [t.lower() for t in df_syn['pre_type']]

In [15]:
mask = df_syn['pre_type'].isin(['pf'])
df_syn = df_syn[mask]

# Completeness of synapses



In [16]:
syn = 'post-synaptic'
source = 'pre'

check_types = ['pf']#set(df_syn['pre_type'].unique()) | set(df_syn['post_type'].unique())

df_progress = efish.check_annot_reconstruction_completeness(df_syn, nodefiles, df_type, syn, source, check_types)

In [17]:
df_progress[(df_progress['cell_type'].isin(['pf']))].sort_values('todo')# & (df_progress['todo']>0) #grc','sgx2','sg2','mg2

Unnamed: 0,id,cell_type,n_syn,done,todo,completed
0,283546452,pf,60,60,0,1.000000
57,290552453,pf,12,12,0,1.000000
56,297177312,pf,4,4,0,1.000000
55,368151099,pf,5,5,0,1.000000
54,364689540,pf,11,11,0,1.000000
...,...,...,...,...,...,...
53,363544667,pf,18,9,9,0.500000
45,117041378,pf,45,32,13,0.711111
4,364857559,pf,23,10,13,0.434783
28,371509204,pf,37,23,14,0.621622


In [18]:
mask = df_progress['completed']<0.95
pre_incomplete = np.asarray([int(c) for c in df_progress[mask]['id'].unique()])