# Updating a proofreading table or a list id file

This notebook contains functions and example scripts to update a proofreading table or lists of ids stored in an excel or txt file

Content per section:
<br>A) Updating IDs and presynaptic counts
<br>B) Updating main postsynaptic neurons database (table)
<br>C) Updating a list of IDs from a txt file
<br>D) Updating a list of IDs from any excel file

In [1]:
import os
import pandas as pd
import numpy as np
import fafbseg
import math
import itertools
from fafbseg import flywire
from caveclient import CAVEclient
from helpers.synapse_queries import combine_xyz, separate_xyz, synapse_count, filter_points, calculate_distance
client = CAVEclient('flywire_fafb_production')



## A) Updating IDs and presynaptic counts

### 1. Loading Tm9 input neuron table

In [60]:
# Choose path and file
dataPath = r'C:\Connectomics-Data\FlyWire\Excels\drive-data-sets'
date = '20230821'
fileName = f'Tm4_neurons_input_count_R_{date}.xlsx'
filePath = os.path.join(dataPath,fileName)

In [61]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
df.head()

Unnamed: 0,presynaptic_ID,counts,count %,postsynaptic_ID,symbol,guess,lab,author,name,detached_lamina (Y/N),...,Update_confidence,Updated_counts,duplicates,Working on,column_id,patch_id,optic_lobe_id,hemisphere,Updated_postsynaptic_ID,dorso-ventral
0,asdf,,,asdf,,,,,,,...,,,,,,,,,,
1,720575940627062936,30.0,7.8125,720575940623645064,L2,,,96.0,Lamina monopolar 2; L2; FBbt_00003720,N,...,,,,,,,,R,,
2,720575940624288136,22.0,5.729167,720575940623645064,L2,,,28.0,Lamina monopolar 2; L2; FBbt_00003720,N,...,,,,,,,,R,,
3,720575940624382442,19.0,4.947917,720575940623645064,L2,,,96.0,Lamina monopolar 2; L2; FBbt_00003720,N,...,,,,,,,,R,,
4,720575940620620385,15.0,3.90625,720575940623645064,Tm16,,,2102.0,"Transmedullary neuron 16, Tm16, FBbt_00003804 ...",N,...,,,,,,,,R,,


In [62]:
#Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
if df["postsynaptic_ID"][0] == 'asdf': 
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)

#df = df[df['counts'] >= 3].copy() # Removing rows based on absolute count filter
    
#Getting the lists of IDs to update
segmentIDs = df["seg_id"].copy()
pre_IDs = df["presynaptic_ID"].copy()
post_IDs = df["postsynaptic_ID"].copy()


In [63]:
#Replacing the 'INPUTS PROOFREAD' labelled row of the lists for the previous postsynaptic cell ID
for i, id in enumerate(pre_IDs):
    if id == 'INPUTS PROOFREAD':
        segmentIDs[i] = post_IDs[i-1] # The previous postsynaptic cell ID
        pre_IDs[i] = post_IDs[i-1]
        post_IDs[i] = post_IDs[i-1]

### 2. Updating IDs considering our excel file extructure (fast and new version)

In [64]:
#Updating segments (trying to update in loops to not collapse the server)

confidence_of_update = []
updated_presynaptic_ID_column = []
pre_ID_i = 0
rounds_of = 100
curr_round = 0
print(f'Total rounds to perform: {math.ceil((len(pre_IDs)/rounds_of))}')

for i in range(0,math.ceil((len(pre_IDs)/rounds_of))):
    curr_round += 1
    curr_pre_IDs = pre_IDs[pre_ID_i:pre_ID_i+rounds_of]
    temp_segmentIDs_df = flywire.update_ids(curr_pre_IDs.tolist(), stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
    updated_presynaptic_ID_column.append(temp_segmentIDs_df["new_id"].tolist())
    confidence_of_update.append(temp_segmentIDs_df["confidence"].tolist())
    pre_ID_i +=rounds_of
    if curr_round%5 == 0: #printing current round every 5 rounds
        print(curr_round)

updated_presynaptic_ID_column = list(itertools.chain.from_iterable(updated_presynaptic_ID_column))
confidence_of_update = list(itertools.chain.from_iterable(confidence_of_update))

Total rounds to perform: 215


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

5


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

10


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

15


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

20


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

25


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

30


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

35


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

40


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

45


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

50


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

55


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

60


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

65


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

70


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

75


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

80


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

85


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

90


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

95


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

100


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

105


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

110


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

115


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

120


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

125


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

130


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

135


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

140


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

145


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

150


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

155


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

160


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

165


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

170


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

175


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

180


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

185


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

190


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

195


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

200


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

205


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

210


Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/100 [00:00<?, ?it/s]

Updating:   0%|          | 0/91 [00:00<?, ?it/s]

215


### 2. Updating IDs considering our excel file extructure (old and slow version) - DO NOT USE -

Updating segments will be slow since we do it in a loop. Please just run this part if you really consider that the IDs in which your are mostly interested in need an update

In [None]:
#Updating segments 
updated_presynaptic_ID_column = []
confidence_of_update = []
count = 1
for id in pre_IDs:
    count =+ 1
    if id == 'INPUTS PROOFREAD':
        updated_presynaptic_ID_column.append('INPUTS PROOFREAD')
        confidence_of_update.append('INPUTS PROOFREAD')
    else:
        temp_segmentIDs_df = flywire.update_ids(id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        updated_presynaptic_ID_column.append(str(temp_segmentIDs_df["new_id"][0]))
        confidence_of_update.append(temp_segmentIDs_df["confidence"][0])
    print(f'row: {count} {updated_presynaptic_ID_column[-1]}')
    

### 2.1 Adding important columns for next steps

In [65]:
### Setting important columns to str
df["presynaptic_ID"] = df["presynaptic_ID"].astype(str)
df["postsynaptic_ID"] = df["postsynaptic_ID"].astype(str)
df["seg_id"] = df["seg_id"].astype(str)

In [66]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_presynaptic_ID"] = updated_presynaptic_ID_column
df["Updated_presynaptic_ID"] = df["Updated_presynaptic_ID"].astype(str) 
df["Update_confidence"] = confidence_of_update

In [67]:
len(confidence_of_update)

21491

### 3. Updating counts between pre- and post synaptic partners

Strategy to save time:
1. (Step1) Create a dictionary with postsynaptic neuron's IDs as KEYS and their a input-neuron dataframe as VALUES , and then (step2) create a loop across presynaptic IDs to get the exact counting from the input-neuron-dataframe of postsynaptic neurons, loading the correct input-neuron-dataframe from the dictionary each time.

    Or, all in one single step: start a loop across unique postsynaptic IDs (be careful that the order is in the same as in the excel file, print them), get the input-neuron dataframe per each one in each round of the loop, and add a second loop across presynaptic IDs to get the exact counting.
    

2. Save the new countings in order of production in the same dataframe, as well as a column showing duplicated rows

In [109]:
#Implementing the "single step" option

unique_post_IDs = df[df['presynaptic_ID'] != 'INPUTS PROOFREAD']['postsynaptic_ID'].unique()
new_count_ls =  [] # For storing the new counts
copy_count_ls = [] # For storing the copy counts 
for post_id in unique_post_IDs: # loop across postsynaptic ids.
    if flywire.is_latest_root([post_id])[0]:
        curr_id = post_id
        print(f'Getting inputs from: {curr_id}')
    else:
        updated_ID_df = flywire.update_ids(post_id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        curr_id = updated_ID_df["new_id"][0]
        print(f'Getting inputs from: {curr_id} (updated)')
    
    curr_df = df[df['postsynaptic_ID'] == post_id].copy()
    curr_neurons_inputs = flywire.synapses.fetch_synapses(curr_id, pre=False, post=True, attach=True, 
                                             min_score=50, clean=True, transmitters=False, 
                                             neuropils=True, batch_size=30, 
                                             dataset='production', progress=True,mat= "live")
    
    if curr_neurons_inputs.empty: #Adding this to fix isues with retrieveing data
        #Adding NaNs
        empty_list = [None]* len(df[df['postsynaptic_ID'] == post_id])
        new_count_ls = new_count_ls + empty_list
        copy_count_ls = copy_count_ls + empty_list
    
    else: 
        
        #Filtering redundant / duplicated counts (threshold = 100 nm)
        combine_xyz(curr_neurons_inputs)
        points = curr_neurons_inputs['pre_pt_position'].tolist()
        points_no_duplicates = filter_points(points, threshold_distance = 100)
        curr_neurons_inputs_no_duplicates = curr_neurons_inputs[curr_neurons_inputs['pre_pt_position'].apply(lambda x: x in points_no_duplicates)].copy()

        pre_id_copies_dict = {} # For checking ID duplicates, triplicates, etc
        for i,pre_id in enumerate(curr_df["Updated_presynaptic_ID"]): # loop across presynaptic ids
            #Counting copies
            if pre_id in pre_id_copies_dict.keys():
                pre_id_copies_dict[pre_id]+= 1 # duplicates, triplicates ...
            else:
                pre_id_copies_dict[pre_id] = 1 # initial count

            c = len(curr_neurons_inputs_no_duplicates[curr_neurons_inputs_no_duplicates['pre_pt_root_id'] == int(pre_id)])
            new_count_ls.append(c) # count between specific pre and post
            copy_count_ls.append(pre_id_copies_dict[pre_id])
            #print(f'Counts with {pre_id}: {c}, confidence {curr_df["Update_confidence"].tolist()[i]}, copy {pre_id_copies_dict[pre_id]}')

        #In old files with this rows acting as separators
        #new_count_ls.append('INPUTS PROOFREAD')
        #copy_count_ls.append('INPUTS PROOFREAD')
        


Getting inputs from: 720575940623645064
Getting inputs from: 720575940627067066
Getting inputs from: 720575940606202290
Getting inputs from: 720575940623959852
Getting inputs from: 720575940620396969
Getting inputs from: 720575940629860603
Getting inputs from: 720575940633696096
Getting inputs from: 720575940616178293
Getting inputs from: 720575940631205071
Getting inputs from: 720575940612157041
Getting inputs from: 720575940639243573
Getting inputs from: 720575940624633783
Getting inputs from: 720575940612723314
Getting inputs from: 720575940622145449
Getting inputs from: 720575940622720517
Getting inputs from: 720575940623285005
Getting inputs from: 720575940611419512
Getting inputs from: 720575940629385086
Getting inputs from: 720575940614230993
Getting inputs from: 720575940628006824
Getting inputs from: 720575940624218119
Getting inputs from: 720575940620300167
Getting inputs from: 720575940614051326
Getting inputs from: 720575940605942974
Getting inputs from: 720575940632205330


In [110]:
# Adding new columns to the main dataframe
df["Updated_counts"] = new_count_ls
df["duplicates"] =  copy_count_ls
df.head()

Unnamed: 0,presynaptic_ID,counts,count %,postsynaptic_ID,symbol,guess,lab,author,name,detached_lamina (Y/N),...,Update_confidence,Updated_counts,duplicates,Working on,column_id,patch_id,optic_lobe_id,hemisphere,Updated_postsynaptic_ID,dorso-ventral
0,720575940627062936,30.0,7.8125,720575940623645064,L2,,,96,Lamina monopolar 2; L2; FBbt_00003720,N,...,1.0,30.0,1.0,,,,,R,,
1,720575940624288136,22.0,5.729167,720575940623645064,L2,,,28,Lamina monopolar 2; L2; FBbt_00003720,N,...,1.0,19.0,1.0,,,,,R,,
2,720575940624382442,19.0,4.947917,720575940623645064,L2,,,96,Lamina monopolar 2; L2; FBbt_00003720,N,...,1.0,18.0,1.0,,,,,R,,
3,720575940620620385,15.0,3.90625,720575940623645064,Tm16,,,2102,"Transmedullary neuron 16, Tm16, FBbt_00003804 ...",N,...,1.0,13.0,1.0,,,,,R,,
4,720575940624119944,13.0,3.385417,720575940623645064,C3,,,104,"Centrifugal neuron 3, C3, FBbt_00003744 (Fisch...",N,...,1.0,9.0,1.0,,,,,R,,


### 4. Saving back to the excell file

In [111]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_dataframe_'+date_str)
writer.save()
writer.close()

### 3. Or, saving in a new excel file

In [112]:
#Saving in a new file

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

file_name = f'Tm4_neurons_input_count_segments_update_{date_str}.xlsx'
savePath = os.path.join(dataPath, file_name)
df.to_excel(savePath, sheet_name='Segments update')

### Faster update (code in progress...)

In [None]:
#Updating segments (FAST)
#Do all segments as once, but filter out columns with 'INPUTS PROOFREAD' first
filtered_df = df[df["presynaptic_ID"] =! 'INPUTS PROOFREAD'].copy()
segmentIDs = filtered_df["seg_id"]
pre_IDs = filtered_df["presynaptic_ID"]
post_IDs = filtered_df["postsynaptic_ID"]

new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
new_segmentIDs = new_segmentIDs_df["new_id"].tolist()


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
filtered_df["Updated_seg_id"] = new_segmentIDs
filtered_df["Updated_seg_id"] = filtered_df["Updated_seg_id"].astype(str) 

In [None]:
# Code not finished....

# B) Updating main postsynaptic neurons database

### 1. Loading postsynaptic neuron dataframe

In [3]:
# Choose path and file
dataPath = r'D:\Connectomics-Data\FlyWire\Excels\drive-data-sets'
date = '20230912'
fileName = 'Tm1 proofreadings_20230105.xlsx'
fileName = 'T4 proofreadings dorsal_20230406.xlsx'
fileName = f'Tm9 proofreadings_{date}.xlsx'
# fileName = f'Tm16 proofreadings_{date}.xlsx'
# fileName = f'Dm12 proofreadings_{date}.xlsx'
# fileName = f'Mi4 proofreadings_{date}.xlsx'
# fileName = f'C2 proofreadings_{date}.xlsx'

filePath = os.path.join(dataPath,fileName)

In [7]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
if df["seg_id"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
df = df.astype(str)
display(df.head())
segmentIDs = df["Updated_seg_id"].tolist()

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,patch_id,column_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),dorso-ventral
0,"63464.0,83140.0,5499.0",Tm9,"82124.0,73675.0,5135.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940631867211,"Fischbach & Dittrich, 1989",...,V,V1,Y,N,N,,N,N,?,V
1,"67273.0,85576.0,5497.0",Tm9,"84894.0,75635.0,5033.0",,L,Marion Silies,Jonas Chojetzki,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940629938298,"Fischbach & Dittrich, 1989",...,V,V2,Y,Y,Y,?,N,Y,Y,V
2,"70063.0,88589.0,5760.0",Tm9,"86449.0,78034.0,4816.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940619904728,"Fischbach & Dittrich, 1989",...,V,V3,Y,Y,Y,Y,N,N,Y,V
3,"65296.0,86194.0,6202.0",Tm9,"81554.0,74784.0,4819.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940626539592,"Fischbach & Dittrich, 1989",...,V,V4,N,N,N,,Y,N,?,V
4,"69273.0,86122.0,4826.0",Tm9,"88273.0,76603.0,5126.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940616954181,"Fischbach & Dittrich, 1989",...,V,V5,Y,N,N,,N,N,?,V


In [8]:
print(segmentIDs[0:5])

['720575940631867211', '720575940629938298', '720575940619904728', '720575940626539592', '720575940616954181']


### 2. Update with CAVE (not preferred)

In [None]:
#Update IDs witth chunkedgraph module of CAVE

# For "segmentsIDs"
#Empty spaces are type float and will be filled with "0"
segmentsIDs_int = list(map(lambda x: 0 if type(x) == float else int(x),segmentIDs)) # From str to int
#to create a np.zeros array is important for the next step
new_segmentsIDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),segmentsIDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_segmentsIDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_segmentsIDs_int)) # From int to str

In [None]:
# For "pre_IDs"
#Empty spaces are type float and will be filled with "0"
pre_IDs_int = list(map(lambda x: 0 if type(x) == float or x == 'INPUTS PROOFREAD'  else int(x),pre_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_pre_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),pre_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_pre_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_pre_IDs_int)) # From int to str

In [None]:
# For "post_IDs"
#Empty spaces are type float and will be filled with "0"
post_IDs_int = list(map(lambda x: 0 if type(x) == float else int(x),post_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_post_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),post_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_post_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_post_IDs_int)) # From int to str

In [None]:
### Seleting the right pre_ID if the update gaves more than one
#Getting the correct pre_IDs than contact each post_ID

from functools import reduce

correct_IDs = {}
curr_post_ID = 'Start'
for idx,pre_IDs in  enumerate(new_pre_IDs_str):
        
    #If there are multiple IDs in an array
    if type(pre_IDs) != str and type(pre_IDs) == np.ndarray:
        #Creatting synapses dataframe only once per each post_ID
        if curr_post_ID != new_post_IDs_str[idx]:
            synapses = flywire.synapses.fetch_synapses(new_post_IDs_str[idx], pre=False, post=True, attach=True,
                                          min_score=50, clean=True, transmitters=False,
                                          neuropils=True, batch_size=30,
                                          dataset='production', progress=True,mat="live")
            #Update post_ID
            print(f"Looking at post_ID: {new_post_IDs_str[idx]}") 
            curr_post_ID = new_post_IDs_str[idx]
            
        #Proof connectivity to the respective post_ID for each of them
        for ID in pre_IDs:
            if synapses[synapses['pre'] == ID].empty:
                continue
            else: # Only add the pre_ID (and its index) which has valid synapses with the post_IDs
                if idx in correct_IDs.keys(): # If there is already a valid ID, add other valid IDs 
                    curr_value =correct_IDs[idx]
                    new_value = curr_value+"_"+str(ID)
                    correct_IDs[idx] =new_value
                else:
                    correct_IDs[idx]=str(ID)

# Fixing the updated pre_IDs_str_list
for key, value in correct_IDs.items():
    new_pre_IDs_str[key] = value
    


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_pre_IDs"] = new_pre_IDs_str
df["Updated_post_IDs"] = new_post_IDs_str

### 2. Update with FAFB (predered)

In [9]:
# Updating the segmentIDs
new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

Updating:   0%|          | 0/1457 [00:00<?, ?it/s]

In [10]:
new_segmentIDs_df["confidence"].unique()

array([1.  , 0.97, 0.95, 0.99])

In [11]:
### Updating the dataframe
# Adding the new column to the data frame
df["Update_confidence"] = new_segmentIDs_df["confidence"].astype(str).tolist()
df["Updated_seg_id"] = new_segmentIDs_df["new_id"].astype(str).tolist()
df.head()

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,patch_id,column_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),dorso-ventral
0,"63464.0,83140.0,5499.0",Tm9,"82124.0,73675.0,5135.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940631867211,"Fischbach & Dittrich, 1989",...,V,V1,Y,N,N,,N,N,?,V
1,"67273.0,85576.0,5497.0",Tm9,"84894.0,75635.0,5033.0",,L,Marion Silies,Jonas Chojetzki,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940629938298,"Fischbach & Dittrich, 1989",...,V,V2,Y,Y,Y,?,N,Y,Y,V
2,"70063.0,88589.0,5760.0",Tm9,"86449.0,78034.0,4816.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940619904728,"Fischbach & Dittrich, 1989",...,V,V3,Y,Y,Y,Y,N,N,Y,V
3,"65296.0,86194.0,6202.0",Tm9,"81554.0,74784.0,4819.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940626539592,"Fischbach & Dittrich, 1989",...,V,V4,N,N,N,,Y,N,?,V
4,"69273.0,86122.0,4826.0",Tm9,"88273.0,76603.0,5126.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940616954181,"Fischbach & Dittrich, 1989",...,V,V5,Y,N,N,,N,N,?,V


In [12]:
#Checking duplicates in ids
df[df["Updated_seg_id"].duplicated() == True]

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,patch_id,column_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),dorso-ventral


In [13]:
#Checking updated ids
df[df["Update_confidence"].astype(float) < 1.]

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,patch_id,column_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),dorso-ventral
37,"54698.0,57524.0,5800.0",Tm9,"75082.0,56266.0,5383.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940635702719,"Fischbach & Dittrich, 1989",...,D,D11,Y,N,N,,N,Y,N,D
497,"75858.0,64623.0,3445.0",Tm9,"90216.0,63152.0,6096.0",,L,Marion Silies,Jonas Chojetzki,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940636332651,"Fischbach & Dittrich, 1989",...,,,Y,N,N,,N,N,?,V
538,"75954.0,86210.0,4327.0",Tm9,"92845.0,77280.0,5202.0",,L,Axel Borst Lab,Amalia Braun,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940619461797,"Fischbach & Dittrich, 1989",...,,,Y,N,N,,N,N,?,V
623,"54845.0,66831.0,5547.0",Tm9,"76772.0,62359.0,5406.0",,L,Marion Silies,Lena Lörsch,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940635947895,"Fischbach & Dittrich, 1989",...,,,Y,N,Y,,N,Y,N,D


### 3. If wished: Reorder rows based on condition

In [None]:
sorted_df = df.sort_values(by = 'cluster_id').copy()

In [39]:
sorted_df = df.copy()

In [57]:
df["seg_id"]

0       720575940631867211
1       720575940629938298
2       720575940619904728
3       720575940626539592
4       720575940616954181
               ...        
1370    720575940602880736
1371    720575940612861462
1372    720575940620517312
1373    720575940615223199
1374    720575940619077686
Name: seg_id, Length: 1375, dtype: int64

### 3. If wished: Add specific column status based on another file

### 3.1 Based on a txt with list od ids

In [10]:
# Selecting column for the update and file from which the info comes
column_to_update = 'healthy_L3 (Y/N)' # 'inputs_proofread (Y/N)', detached_lamina (Y/N), 'healthy_L3 (Y/N)'

update_file_path = r'D:\Connectomics-Data\FlyWire\Txts\inputs_proofread'
update_file_with = 'root_ids_Tm9_left_optic_lobe_selection1_plus_patches.txt' # list of ids
update_file_with = 'root_ids_Tm9_left_optic_lobe_selection1_lamina_attached_only_more_strict.txt' # list of ids
update_file_with = 'root_ids_Tm9_right_optic_lobe_selection1.txt' # list of ids
update_file_with = 'root_ids_Tm9_all_input_proofread_20230609.txt' # list of ids
update_file_with = 'root_ids_Tm9_left_optic_lobe_healthy_L3.txt' # list of ids

update_filePath =os.path.join(update_file_path,update_file_with)

# extractring info from the specific file
update_file_with_df = pd.read_csv(update_filePath)
update_file_with_ids_list = update_file_with_df.columns.tolist()

# Updating the list 
df[f'{column_to_update}_updated'] = np.where(df['seg_id'].isin(update_file_with_ids_list), "Y", "?")
df.head()

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),dorso-ventral,Update_confidence,Updated_seg_id,healthy_L3 (Y/N)_updated
0,"63464.0,83140.0,5499.0",Tm9,"82124.0,73675.0,5135.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940631867211,"Fischbach & Dittrich, 1989",...,N,N,,N,,?,V,1.0,720575940631867211,?
1,"67273.0,85576.0,5497.0",Tm9,"84894.0,75635.0,5033.0",,L,Marion Silies,Jonas Chojetzki,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940629938298,"Fischbach & Dittrich, 1989",...,Y,Y,?,N,,Y,V,1.0,720575940629938298,Y
2,"70063.0,88589.0,5760.0",Tm9,"86449.0,78034.0,4816.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940619904728,"Fischbach & Dittrich, 1989",...,Y,Y,Y,N,,Y,V,1.0,720575940619904728,?
3,"65296.0,86194.0,6202.0",Tm9,"81554.0,74784.0,4819.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940626539592,"Fischbach & Dittrich, 1989",...,N,N,,Y,,?,V,1.0,720575940626539592,?
4,"69273.0,86122.0,4826.0",Tm9,"88273.0,76603.0,5126.0",,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940616954181,"Fischbach & Dittrich, 1989",...,N,N,,N,,?,V,1.0,720575940616954181,?


In [12]:
update_file_with_ids_list

['720575940643659287',
 '720575940622899224',
 '720575940633427426',
 '720575940627583048',
 '720575940622355226',
 '720575940628046101',
 '720575940638891683',
 '720575940639351349',
 '720575940611032530',
 '720575940620949925',
 '720575940625864487',
 '720575940626042416',
 '720575940625859709',
 '720575940622307753',
 '720575940633403053',
 '720575940644074656',
 '720575940624206315',
 '720575940639463101',
 '720575940624502013',
 '720575940620954533',
 '720575940627300410',
 '720575940636145775',
 '720575940625640122',
 '720575940612306650',
 '720575940620933837',
 '720575940616066461',
 '720575940641033205',
 '720575940632803228',
 '720575940625651130',
 '720575940609123267',
 '720575940640141885',
 '720575940636951159',
 '720575940635104447',
 '720575940623740883',
 '720575940625970707',
 '720575940631980103',
 '720575940609986680',
 '720575940624753812',
 '720575940625514599',
 '720575940612939634',
 '720575940622801366',
 '720575940624426760',
 '720575940618701888',
 '720575940

### 3.2 Or, Based on excel files with ids in a column and extra information in other columns

In [41]:
# Selecting column for the update and file from which the info comes
#For XYZ columns
update_file_path = r'E:\Connectomics-Data\FlyWire\Excels'
update_file_with = f'XYZ_df_{date}.xlsx'
update_filePath =os.path.join(update_file_path,update_file_with)

# extractring info from the specific file
update_file_with_df = pd.read_excel(update_filePath)
update_file_with_df.head()


Unnamed: 0,XYZ-ME,XYZ-LO,Updated_seg_id,seg_id
0,"63464.0,83140.0,5499.0","82124.0,73675.0,5135.0",720575940631867211,720575940631867211
1,"67273.0,85576.0,5497.0","84894.0,75635.0,5033.0",720575940629938298,720575940629938298
2,"70063.0,88589.0,5760.0","86449.0,78034.0,4816.0",720575940619904728,720575940619904728
3,"65296.0,86194.0,6202.0","81554.0,74784.0,4819.0",720575940626539592,720575940626539592
4,"69273.0,86122.0,4826.0","88273.0,76603.0,5126.0",720575940616954181,720575940616954181


In [42]:
#Transfering info from one dataframe to another based on a reference column (here seg_id)
def update_dataframe_single_column(source_df, target_df, reference_column):
    # Create a dictionary mapping from the reference column to the source DataFrame
    reference_dict = source_df.groupby(reference_column).first().reset_index().to_dict(orient='records')
    reference_dict = {row[reference_column]: row for row in reference_dict}

    # Update the target DataFrame based on the reference column
    for i, row in target_df.iterrows():
        ref = row[reference_column]
        if ref in reference_dict:
            source_row = reference_dict[ref]
            target_df.loc[i] = source_row

    return target_df

In [43]:
# Function inputs
source_cols = ['XYZ-ME', 'XYZ-LO','seg_id']
target_cols = ['XYZ-ME', 'XYZ-LO','seg_id']
reference_column = 'seg_id'

# Selecting dataframes and resetting index
source_df = update_file_with_df[source_cols].copy()
source_df.reset_index(inplace = True, drop = True)
target_df = df[target_cols].copy()
target_df.reset_index(inplace = True, drop = True)


source_df = source_df.astype(str)
target_df = target_df.astype(str)

# Running the function and compleating the dataset
result_df = update_dataframe_single_column(source_df, target_df,reference_column)
result_df.head()

Unnamed: 0,XYZ-ME,XYZ-LO,seg_id
0,"63464.0,83140.0,5499.0","82124.0,73675.0,5135.0",720575940631867211
1,"67273.0,85576.0,5497.0","84894.0,75635.0,5033.0",720575940629938298
2,"70063.0,88589.0,5760.0","86449.0,78034.0,4816.0",720575940619904728
3,"65296.0,86194.0,6202.0","81554.0,74784.0,4819.0",720575940626539592
4,"69273.0,86122.0,4826.0","88273.0,76603.0,5126.0",720575940616954181


In [44]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

result_df = result_df.astype(str)
result_df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()

### 3. If wished: Add the center of mass of postsynaptic sites

In [14]:
#Function
def find_center_point(points, threshold):
    if isinstance(points, list):
        points = np.array(points)

    # Calculate the distances between each point and all other points
    distances = np.linalg.norm(points[:, np.newaxis] - points, axis=2)

    # Calculate the average distance for each point
    avg_distances = np.mean(distances, axis=1)

    # Find the indices of points within the threshold distance
    valid_indices = np.where(avg_distances < threshold)[0]

    # Check if there are any valid points
    if len(valid_indices) > 0:
        # Calculate the geometric center of valid points
        center_point = np.mean(points[valid_indices], axis=0)
        #Rounding
        center_point = np.round(center_point, decimals=1)
        
        # Find the closest point to the center
        closest_point_index = np.argmin(np.linalg.norm(points[valid_indices] - center_point, axis=1))
        closest_point = points[valid_indices][closest_point_index]
    else:
        center_point = np.array([0,0,0])
        closest_point = np.array([0,0,0])

    return center_point.tolist(), closest_point.tolist()

In [15]:
def combine_xyz(df):
    """
    Combines separated x, y and z column into one, changes units and adds new column names for
    generating a neuroglancer link with function nglui.statebuilder.helpers.make_synapse_neuroglancer_link

    Args:
        pandas data frame containing x,y and z as columns of the same length

    Returns:
        same pandas data frame containing a new column with [x/4,y/4,z/40] lists
    """
    # Generating the single column

    post_pt_position = []
    for x,y,z in zip(df['post_x'].tolist(),df['post_y'].tolist(),df['post_z'].tolist()):
        temp_ls = [x/4,y/4,z/40]
        post_pt_position.append(temp_ls)

    pre_pt_position = []
    for x,y,z in zip(df['pre_x'].tolist(),df['pre_y'].tolist(),df['pre_z'].tolist()):
        temp_ls = [x/4,y/4,z/40]
        pre_pt_position.append(temp_ls)

    #Adding new columns and names
    df['post_pt_position'] = post_pt_position
    df['pre_pt_position'] = pre_pt_position
    #Changing column names
    df.rename(columns={'pre': 'pre_pt_root_id', 'post': 'post_pt_root_id'}, inplace=True)

In [27]:
#Loading postsynaptic coordinated for each neuron in a specific neuropile and getting the center point
#The loop can take quite a lot of time depending on the ammount of ids!


#Looping for several rounds of X (e.g, 100)
rounds_of = 100
loop_number = 1 # Must start at "1" if not run before
start_point = (rounds_of*loop_number) - rounds_of
 
#Shortening the df to priorize id:

#short_df = df[(df['detached_lamina (Y/N)'] == 'N') &(df['inputs_proofread (Y/N)'] == 'Y')].copy()
short_df = df[df['XYZ-ME'] == 'nan'].copy()

In [28]:
len(short_df)

76

In [29]:
round(len(short_df)/rounds_of)

1

In [30]:
# Looping across chosen rows

for i in range(loop_number, round(len(short_df)/rounds_of)+2):
    print(f'Loop #: {loop_number}')
    curr_df = short_df[start_point:start_point+rounds_of].copy()
    curr_df.reset_index(inplace=True,drop=True)

    XYZ_ME = []
    XYZ_LO = []
    for seg_id in curr_df["Updated_seg_id"]:

        #Print
        print(f'currently at: {seg_id}')
        # Getting coordinates of synaptic contacts
        neurons_inputs = flywire.synapses.fetch_synapses(seg_id, pre=False, post=True, attach=True, 
                                                     min_score=50, clean=True, transmitters=False, 
                                                     neuropils=True, batch_size=30, 
                                                     dataset='production', progress=True,mat= "live")
        combine_xyz(neurons_inputs)
        threshold = 5000

        try:
            #Find the center point with medulla coordinates
            neurons_inputs_ME = neurons_inputs[neurons_inputs['neuropil'].str.contains('ME')].copy()
            points = neurons_inputs_ME['post_pt_position'].tolist()
            center_point, closest_point = find_center_point(points, threshold)
            XYZ_ME.append(closest_point)
        except: # to catch some point clouds that have no ME 
            XYZ_ME.append([0,0,0])
            
        try:
            #Find the center point with lobula coordinates
            neurons_inputs_LO = neurons_inputs[neurons_inputs['neuropil'].str.contains('LO')].copy()
            points = neurons_inputs_LO['post_pt_position'].tolist()
            center_point, closest_point = find_center_point(points, threshold)
            XYZ_LO.append(closest_point)
        except: # to catch some point clouds that have no LO labels
            XYZ_LO.append([0,0,0])
            

    XYZ_LO_strings = [','.join(map(str, sublist)) for sublist in XYZ_LO]
    XYZ_ME_strings = [','.join(map(str, sublist)) for sublist in XYZ_ME]

    #saving
    XYZ_df = pd.DataFrame(XYZ_ME_strings, columns=['XYZ-ME'])
    XYZ_df['XYZ-LO'] = XYZ_LO_strings
    XYZ_df['Updated_seg_id'] =  curr_df['Updated_seg_id']
    XYZ_df['seg_id'] =  curr_df['seg_id']
    XYZ_df.to_excel(f'D:\Connectomics-Data\FlyWire\Excels\drive-data-sets\XYZ_df_{loop_number}_{date}.xlsx', index=False)
    start_point += rounds_of
    loop_number += 1


Loop #: 1
currently at: 720575940620627538
currently at: 720575940626434348
currently at: 720575940631049291
currently at: 720575940647336068
currently at: 720575940633455052
currently at: 720575940611861710
currently at: 720575940624344516
currently at: 720575940622001833
currently at: 720575940622687751
currently at: 720575940618883032
currently at: 720575940622926853
currently at: 720575940613618216
currently at: 720575940634666847
currently at: 720575940617322101
currently at: 720575940642562267
currently at: 720575940627175671
currently at: 720575940635061695
currently at: 720575940630462684
currently at: 720575940635388607
currently at: 720575940637504653
currently at: 720575940639898420
currently at: 720575940630982711
currently at: 720575940626245676
currently at: 720575940629639674
currently at: 720575940617488541
currently at: 720575940614473011
currently at: 720575940612568549
currently at: 720575940620597586
currently at: 720575940608690443
currently at: 720575940622440150


In [None]:

## Testing function with some manually inserted dots
import numpy as np
import matplotlib.pyplot as plt

# Example XYZ coordinates
points = [
    [66659, 85714, 5406],
    [66885, 85749, 5367],
    [67124, 86454, 5390],
    [67664, 86251, 5434],
    [67273, 85576, 5497],
    [66399, 85482, 5428],
    [66803, 85360, 5477],
    [66505, 85552, 5413],
    [66772, 85838, 5450],
    [66496, 85540, 5402],
    [72452, 82402, 5694],
    [75373, 80809, 5808]
]

points = neurons_inputs_LO['post_pt_position'].tolist()

# Convert the points to a NumPy array
points = np.array(points)

# Set the threshold distance to exclude outliers
threshold = 5000

# Find the center point
center_point, closest_point  = find_center_point(points, threshold)

if center_point is not None:
    print("Center Point:", center_point)
else:
    print("No valid points within the threshold.")

# Plotting the points and center point
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Scatter plot for the points
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c='y', label='Points')

# Scatter plot for the center point
if center_point is not None:
    ax.scatter(center_point[0], center_point[1], center_point[2], c='r', marker='x', label='Center Point')
    ax.scatter(closest_point [0], closest_point [1], closest_point [2], c='b', marker='x', label='closest_point')

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.legend()

plt.show()

### 4. Saving back to excel file

In [31]:

# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()

# C) Updating of list of IDs from a txt file / or a plain excell file


### 1. Loading the data from a txt file

In [19]:
# Choose path and file
dataPath = r'C:\Users\smolina\Connectomics-Data\FlyWire\Txts\cell_type_proofread'
fileDate = '20230906'
fileName = f'root_ids_marker_olr_type_Tm20_{fileDate}.txt'
#fileName = f'temp.txt'
filePath = os.path.join(dataPath,fileName)
ids_df = pd.read_csv(filePath, sep = ",")
curr_ID_ls = ids_df.columns.tolist()
print(curr_ID_ls)

['720575940605979424', '720575940609569237', '720575940612006350', '720575940618051001', '720575940618588075', '720575940619160405', '720575940619505243', '720575940619640280', '720575940620650155', '720575940621086452', '720575940621216577', '720575940626427024', '720575940626640539', '720575940627721295', '720575940627764105', '720575940628252871', '720575940628402760', '720575940628781111', '720575940630084986', '720575940630311954', '720575940630683961', '720575940635666356', '720575940637730394', '720575940645114647', '720575940645352471', '720575940603809452', '720575940603915436', '720575940604529580', '720575940604711392', '720575940604721376', '720575940604837344', '720575940604892640', '720575940605026784', '720575940605225900', '720575940605293698', '720575940605454025', '720575940605678270', '720575940605952433', '720575940606290121', '720575940606337969', '720575940606502066', '720575940606553289', '720575940606790834', '720575940606839218', '720575940606855177', '72057594

In [20]:
#Updating the IDs via Fafbseg
updated_ID_df = fafbseg.flywire.update_ids(curr_ID_ls, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

Updating:   0%|          | 0/733 [00:00<?, ?it/s]

In [21]:
updated_ID_df[updated_ID_df['confidence'] < 1]

Unnamed: 0,old_id,new_id,confidence,changed
0,720575940605979424,720575940624947207,0.82,True
2,720575940612006350,720575940613508325,0.47,True
11,720575940626427024,720575940621601926,0.76,True
15,720575940628252871,720575940619043156,0.99,True
47,720575940607244489,720575940631332060,0.99,True
50,720575940607638108,720575940606851761,0.7,True
60,720575940609160713,720575940615644066,0.83,True
68,720575940610212420,720575940633727059,0.93,True
70,720575940610538030,720575940637859246,0.83,True
89,720575940612575717,720575940623832781,0.99,True


In [22]:
updated_ID_df[updated_ID_df['confidence'] < 1]['new_id'].tolist()

[720575940624947207,
 720575940613508325,
 720575940621601926,
 720575940619043156,
 720575940631332060,
 720575940606851761,
 720575940615644066,
 720575940633727059,
 720575940637859246,
 720575940623832781,
 720575940656115105,
 720575940622382964,
 720575940622021552,
 720575940619432773,
 720575940625798076,
 720575940616565958,
 720575940632915503,
 720575940624523411,
 720575940614987059,
 720575940622050726,
 720575940615084719,
 720575940648459652,
 720575940605924320,
 720575940609972747,
 720575940632501034,
 720575940626806672,
 720575940615161297,
 720575940629766403,
 720575940653269238,
 720575940630499831,
 720575940627502876,
 720575940619105053,
 720575940619717942,
 720575940628725009,
 720575940637608846,
 720575940620910928,
 720575940624702727,
 720575940621639339,
 720575940616984518,
 720575940609623049,
 720575940620104059,
 720575940624208520,
 720575940617320379,
 720575940630250087,
 720575940622485887]

### 2. Saving data in txt file

In [23]:
#Saving the data
updated_fileName = f'Updated_{fileName}'
updated_filePath = os.path.join(dataPath,updated_fileName)

id_list = list(set(updated_ID_df['new_id'].tolist()))
with open(updated_filePath , "w") as output:
    output.write(str(id_list))


### 1. Or, loading the data from an excell file

In [3]:
# Choose path and file
PC_disc = 'D'
dataPath = f'{PC_disc}:\Connectomics-Data\FlyWire\Excels\drive-data-sets\database'
date = '20231017'
fileName = f'TmY10 proofreadings_{date}.xlsx'
filePath = os.path.join(dataPath,fileName)

#Loading file as DataFrame
df = pd.read_excel(filePath)


#Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
if df["seg_id"][0] == 'asdf': 
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)

#Dropping dupllicates
result_df = df.drop_duplicates(subset=["seg_id"], keep='first').copy()

#Quick look on the dataframe
display(result_df.head())
    
#Getting the lists of IDs to update
curr_ID_ls = result_df["seg_id"].tolist()

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,rim_area (Y/N),hemisphere,lab,author,name,seg_id,...,optic_lobe_id,patch_id,column_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N)
0,,TmY10,,,,R,,,,720575940627147322,...,,,,,,,,,,
1,,TmY10,,,,R,,,,720575940625575973,...,,,,,,,,,,
2,,TmY10,,,,R,,,,720575940635959159,...,,,,,,,,,,
3,,TmY10,,,,R,,,,720575940628843536,...,,,,,,,,,,
4,,TmY10,,,,R,,,,720575940617736616,...,,,,,,,,,,


In [6]:
#Updating all IDs at once
updated_ID_df = fafbseg.flywire.update_ids(curr_ID_ls, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

Updating:   0%|          | 0/255 [00:00<?, ?it/s]

In [None]:
# Or, Updating the IDs via Fafbseg in a for loop

_start = 0
_steps = 100
_last = _steps
_rounds =  round(len(curr_ID_ls)/ _steps) +1

print(f'Rounds to perform: {_rounds}')
updated_ID_df = pd.DataFrame()
for i in range(0,_rounds):
    #print(f'Round {i}')
    curr_ID_df = fafbseg.flywire.update_ids(curr_ID_ls[_start:_last], stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
    updated_ID_df = pd.concat([updated_ID_df,curr_ID_df])
    _start = _start + _steps
    _last = _last + _steps


In [7]:
result_df.reset_index(drop=True, inplace=True)
result_df['Updated_seg_ids'] = updated_ID_df['new_id'].astype(str).tolist()
result_df['Updated_confidence'] = updated_ID_df['confidence'].astype(str).tolist()

In [8]:
result_df.tail()

Unnamed: 0,XYZ-ME,symbol,XYZ-LO,quality comment,rim_area (Y/N),hemisphere,lab,author,name,seg_id,...,column_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),Updated_seg_ids,Updated_confidence
250,,TmY10,,,,R,,,,720575940644680648,...,,,,,,,,,720575940619580224,1.0
251,,TmY10,,,,R,,,,720575940610758008,...,,,,,,,,,720575940610758008,1.0
252,,TmY10,,,,R,,,,720575940620862384,...,,,,,,,,,720575940622944773,0.99
253,,TmY10,,,,R,,,,720575940620744225,...,,,,,,,,,720575940640094542,0.95
254,,TmY10,,,,R,,,,720575940614468094,...,,,,,,,,,720575940614468094,1.0


In [9]:
display(updated_ID_df.head())
print('Update confidences: ')
print(set(updated_ID_df['confidence'].tolist()))

Unnamed: 0,old_id,new_id,confidence,changed
0,720575940627147322,720575940611088452,1.0,True
1,720575940625575973,720575940626398797,1.0,True
2,720575940635959159,720575940635959159,1.0,False
3,720575940628843536,720575940634349523,1.0,True
4,720575940617736616,720575940620728536,1.0,True


Update confidences: 
{0.99, 1.0, 0.6, 0.96, 0.86, 0.82, 0.95, 0.9, 0.94, 0.88, 0.98, 0.81, 0.92, 0.97, 0.87, 0.89}


In [10]:
updated_ID_df[updated_ID_df['confidence'] < 1]

Unnamed: 0,old_id,new_id,confidence,changed
15,720575940621734109,720575940626398285,0.99,True
17,720575940631809611,720575940615126447,0.86,True
37,720575940612241066,720575940613605273,0.96,True
44,720575940630558838,720575940623180321,0.96,True
45,720575940630513408,720575940610294211,0.6,True
55,720575940611821410,720575940624345641,0.82,True
65,720575940633890017,720575940626397773,0.6,True
74,720575940617878299,720575940621661062,0.95,True
75,720575940620452261,720575940628938472,0.95,True
79,720575940619668904,720575940640337205,0.9,True


In [11]:
updated_ID_df[updated_ID_df['confidence'] < 1]['new_id'].tolist()

[720575940626398285,
 720575940615126447,
 720575940613605273,
 720575940623180321,
 720575940610294211,
 720575940624345641,
 720575940626397773,
 720575940621661062,
 720575940628938472,
 720575940640337205,
 720575940612504782,
 720575940623464918,
 720575940621795822,
 720575940621080711,
 720575940610500547,
 720575940630586238,
 720575940623463382,
 720575940628658692,
 720575940612980629,
 720575940628682383,
 720575940627519746,
 720575940615125423,
 720575940628389970,
 720575940623464918,
 720575940620724952,
 720575940628937960,
 720575940613148378,
 720575940621795822,
 720575940628389970,
 720575940628633621,
 720575940634352851,
 720575940625169661,
 720575940628937960,
 720575940621932909,
 720575940621655942,
 720575940622007905,
 720575940624345641,
 720575940632606777,
 720575940611069764,
 720575940622822388,
 720575940640266037,
 720575940629041916,
 720575940622944773,
 720575940640094542]

In [12]:
updated_ID_df[updated_ID_df['confidence'] < 1]['old_id'].tolist()

[720575940621734109,
 720575940631809611,
 720575940612241066,
 720575940630558838,
 720575940630513408,
 720575940611821410,
 720575940633890017,
 720575940617878299,
 720575940620452261,
 720575940619668904,
 720575940624810220,
 720575940620279643,
 720575940604946633,
 720575940616425797,
 720575940618468637,
 720575940613146861,
 720575940620164657,
 720575940619190577,
 720575940633880462,
 720575940632453224,
 720575940639680445,
 720575940633883031,
 720575940613988077,
 720575940624863997,
 720575940629695223,
 720575940628061061,
 720575940629590654,
 720575940648559748,
 720575940618239673,
 720575940619718449,
 720575940617829588,
 720575940620840277,
 720575940635451022,
 720575940630272572,
 720575940633277335,
 720575940624042760,
 720575940615119569,
 720575940645307415,
 720575940626370704,
 720575940628319431,
 720575940620337317,
 720575940623956265,
 720575940620862384,
 720575940620744225]

### 2. Saving back in the excel file

In [13]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

result_df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()

In [102]:
updated_ID_df[updated_ID_df['new_id'] == 720575940613560342]

Unnamed: 0,old_id,new_id,confidence,changed
