# Analysis of inputs of a list of neurons

From a .txt raw data file containing root IDs of the desired postsynaptic neuron:
- updates the root IDs
- fetch synapses ad generates the synapse count per each individual input partner
- adds useful information: cell types names and authors
- saves the neurons-inputs-count in a .xlsx file.

In [None]:
#Importing packages

import fafbseg
from fafbseg import flywire
import numpy as np
import pandas as pd
import os
from helpers.helper import update_dataframe_single_column

### 1. Loading and updating ids from txt file

In [None]:
# Choose path and file

dataPath = r'Z:\Further projects\Heterogeneity across cell types\data\Txts\cell_type_poofread' # YOUR-PATH to txt
fileName = f'root_ids_Mi1_R_20231106.txt'
filePath = os.path.join(dataPath,fileName)
ids_df = pd.read_csv(filePath, sep = ",")
curr_ID_ls = ids_df.columns.tolist()
len(curr_ID_ls)

In [None]:
#Updating the IDs via Fafbseg

updated_ID_df = fafbseg.flywire.update_ids(curr_ID_ls, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

### 2. Fetching synapses

In [None]:
# Fetching synapses from new IDs

ID_ls = updated_ID_df["new_id"].tolist()

if np.unique(flywire.is_latest_root(ID_ls))[0]:
    # Fetch the neuron's inputs and putputs
    neurons_inputs = flywire.synapses.fetch_synapses(ID_ls, pre=False, post=True, attach=True, 
                                             min_score=50, clean=True, transmitters=False, 
                                             neuropils=True, batch_size=30, 
                                             dataset='production', progress=True,mat= "live")
else:
    print(f'IDs not up to date, analysis aborted') 

In [None]:
# Selecting inputs from a single neuropile

neuropile_list = ['ME_L'] # Using fafbseg 1.14.0 version, ME_L is R and ME_R is left. In newwer version this has been fixed.
neuropile_neurons_inputs = neurons_inputs[(neurons_inputs['neuropil'].isin(neuropile_list))].copy()

In [None]:
# Counting inputs and ouputs per ID, option joining dataframes

final_input_df = pd.DataFrame()
for n in neuropile_neurons_inputs['post'].unique():
    inputs_count = {}
    curr_inputs = neuropile_neurons_inputs[neuropile_neurons_inputs['post'] == n]
    inputs_str = curr_inputs.applymap(str)
    
    for c in inputs_str['pre'].to_list():
        inputs_count[c] = inputs_count.get(c, 0) + 1
    input_count_df = pd.DataFrame(inputs_count, index=[0])
    input_count_df = input_count_df.T
    input_count_df.rename(columns={0: "counts"},inplace=True)
    input_count_df.index.names = ['presynaptic_ID']
    input_count_df = input_count_df.sort_values(by="counts",ascending=False)
    input_count_df['postsynaptic_ID'] = inputs_str['post'].to_list()[0:len(input_count_df)]
    final_input_df = final_input_df.append(input_count_df)
    #print(f'Counting done for: {n}')
input_count_str_df = final_input_df.applymap(str)
input_count_str_df.head()

### 3. Adding useful information to the data frame: cell types names and authors

In [None]:
# Selecting dataframe

partner_ID = input_count_str_df.index.tolist()

In [None]:
#Updating the IDs via Fafbseg

updated_ID_df = fafbseg.flywire.update_ids(partner_ID, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
partner_ID_ls = updated_ID_df["new_id"].tolist()

In [None]:
# Identifying user-based annotations about cell identity

identification_df = flywire.find_celltypes(partner_ID_ls, user=None, exact=False, case=False, regex=True, update_roots=False)
identification_no_duplicates_df = identification_df.drop_duplicates(subset='pt_root_id', keep='last', inplace=False, ignore_index=False).copy()

In [None]:
# Adding info to the current data set

# Selecting dataframes and resetting index
source_df = identification_no_duplicates_df.copy()
source_df.reset_index(inplace = True, drop = True)
target_df = input_count_str_df.copy()
target_df.reset_index(inplace = True, drop = True)


# Adding columns for the function to properly work
target_df['partner_ID'] = input_count_str_df.index.astype(str)
source_df['partner_ID'] = identification_no_duplicates_df['pt_root_id'].tolist()
target_df['name'] = None
source_df['name'] = identification_no_duplicates_df['tag'].tolist()
target_df['author'] = None
source_df['author'] = identification_no_duplicates_df['user_id'].tolist()

# Function inputs
source_cols = ['name', 'author','partner_ID']
target_cols = ['name', 'author', 'partner_ID']
reference_column = 'partner_ID'

source_df = source_df[source_cols].copy()
target_df = target_df[source_cols].copy()

source_df = source_df.astype(str)
target_df = target_df.astype(str)


# Running the function and compleating the dataset
result_df = update_dataframe_single_column(source_df, target_df,reference_column)
result_df['counts'] = input_count_str_df['counts'].tolist()
result_df['postsynaptic_ID'] = input_count_str_df['postsynaptic_ID'].tolist()
result_df.head()



In [None]:
# Saving data in your computer
outDir = r'Z:\Further projects\Heterogeneity across cell types\data\Excels\min-score-50' # YOUR-PATH for saving excel file
save_excel_file = True

if save_excel_file: 
    ## Input count
    file_name = f'Mi1_neurons_input_count_R.xlsx'
    savePath = os.path.join(outDir, file_name)
    result_df.to_excel(savePath, sheet_name='Buhmann synapses')