# Updating a proofreading table

This notebook contains functions and example scripts to update a proofreading table stored in an excel file

Content per section:
A) Updating IDs and ounts
B) Updating main postsynaptic neurons database (table)

In [1]:
import os
import pandas as pd
import numpy as np
from fafbseg import flywire
from caveclient import CAVEclient
client = CAVEclient('flywire_fafb_production')



## A) Updating IDs and presynaptic counts

### 1. Loading Tm9 input neuron table

In [69]:
# Choose path and file
dataPath = r'D:\Connectomics-Data\FlyWire\Excels\drive-data-sets'
date = '20230524'
fileName = f'All_Tm9_neurons_input_count_ME_R_{date}.xlsx'
fileName = f'Across_OL_Tm9_neurons_input_count_ME_R_{date}.xlsx'
filePath = os.path.join(dataPath,fileName)

In [70]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
df.head()

Unnamed: 0,presynaptic_ID,counts,count %,postsynaptic_ID,symbol,guess,lab,author,name,FlyWire proofread (Y/N),...,Update_confidence,Updated_counts,duplicates,notes,annotations_link,Working on,optic_lobe_id,column_id,patch_id,hemisphere
0,,,,asdf,,,,,,,...,,,,,,,,,,
1,7.205759406045856e+17,24.0,,720575940637150821,L3,,Mathias Wernet Lab,Gizem Sancer,"Lamina monopolar cell 3, L3, FBbt_00003721 (F...",Y,...,,,,,,,,,,R
2,7.205759406220529e+17,17.0,,720575940637150821,Mi4,,Marion Silies,Lena Lörsch,"Medullary intrinsic neuron 4, Mi4, Mi4_R, FBbt...",Y,...,,,,,,,,,,R
3,7.205759406309476e+17,6.0,,720575940637150821,Tm16,,Marion Silies,Sebastian Mauricio Molina Obando,"Transmedullary neuron 16, Tm16, Tm16_R, FBbt_0...",Y,...,,,,,,,,,,R
4,7.205759406150035e+17,4.0,,720575940637150821,Tm16,,,,"Transmedullary neuron 16, Tm16, Tm16_R, FBbt_0...",Y,...,,,,,,,,,,R


In [71]:
#Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
if df["postsynaptic_ID"][0] == 'asdf': 
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)

#Getting the lists of IDs to update
segmentIDs = df["seg_id"].copy()
pre_IDs = df["presynaptic_ID"].copy()
post_IDs = df["postsynaptic_ID"].copy()


In [72]:
#Replacing the 'INPUTS PROOFREAD' labelled row of the lists for the previous postsynaptic cell ID
for i, id in enumerate(pre_IDs):
    if id == 'INPUTS PROOFREAD':
        segmentIDs[i] = post_IDs[i-1] # The previous postsynaptic cell ID
        pre_IDs[i] = post_IDs[i-1]
        post_IDs[i] = post_IDs[i-1]

### 2. Updating IDs considering our excel file extructure (fast and new version)

In [73]:
#Updating segments 

temp_segmentIDs_df = flywire.update_ids(pre_IDs.tolist(), stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
updated_presynaptic_ID_column = temp_segmentIDs_df["new_id"]
confidence_of_update = temp_segmentIDs_df["confidence"]




Updating:   0%|          | 0/5949 [00:00<?, ?it/s]

### 2. Updating IDs considering our excel file extructure (old and slow version) - DO NOT USE -

Updating segments will be slow since we do it in a loop. Please just run this part if you really consider that the IDs in which your are mostly interested in need an update

In [41]:
#Updating segments 
updated_presynaptic_ID_column = []
confidence_of_update = []
count = 1
for id in pre_IDs:
    count =+ 1
    if id == 'INPUTS PROOFREAD':
        updated_presynaptic_ID_column.append('INPUTS PROOFREAD')
        confidence_of_update.append('INPUTS PROOFREAD')
    else:
        temp_segmentIDs_df = flywire.update_ids(id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        updated_presynaptic_ID_column.append(str(temp_segmentIDs_df["new_id"][0]))
        confidence_of_update.append(temp_segmentIDs_df["confidence"][0])
    print(f'row: {count} {updated_presynaptic_ID_column[-1]}')
    

row: 1 720575940659388801
row: 1 720575940615238668
row: 1 720575940609780693
row: 1 720575940625018512
row: 1 720575940636607343
row: 1 720575940614237736
row: 1 720575940612520666
row: 1 720575940628312425
row: 1 720575940615007143
row: 1 720575940605245984
row: 1 720575940618464084
row: 1 720575940630882620
row: 1 720575940620975525
row: 1 720575940631339026
row: 1 720575940639559357
row: 1 720575940610613902
row: 1 720575940617694164
row: 1 720575940623818423
row: 1 720575940620503461
row: 1 720575940624241589
row: 1 720575940631976403
row: 1 720575940626440240
row: 1 720575940627168798
row: 1 720575940631116908
row: 1 720575940610400142
row: 1 720575940639005006
row: 1 720575940620233381
row: 1 720575940622763809
row: 1 720575940625488142
row: 1 720575940624124935
row: 1 720575940608879964
row: 1 720575940628041969
row: 1 720575940629569835
row: 1 720575940405923014
row: 1 720575940632726732
row: 1 INPUTS PROOFREAD
row: 1 720575940608700117
row: 1 720575940606285502
row: 1 7205759

row: 1 720575940613411606
row: 1 720575940620459201
row: 1 720575940626459236
row: 1 720575940645022756
row: 1 720575940608590601
row: 1 720575940613079781
row: 1 720575940605202092
row: 1 720575940627304073
row: 1 INPUTS PROOFREAD
row: 1 720575940624985700
row: 1 720575940632418605
row: 1 720575940626719065
row: 1 720575940617817181
row: 1 720575940637788213
row: 1 720575940632171724
row: 1 720575940634841244
row: 1 720575940629804699
row: 1 720575940633028260
row: 1 720575940635065239
row: 1 720575940624134538
row: 1 720575940628101253
row: 1 720575940649696633
row: 1 720575940645336855
row: 1 720575940624882494
row: 1 720575940629563983
row: 1 720575940626440240
row: 1 720575940624020009
row: 1 720575940622589978
row: 1 720575940518216067
row: 1 720575940623889800
row: 1 720575940608824649
row: 1 720575940632050232
row: 1 720575940623001599
row: 1 720575940610459977
row: 1 720575940630016450
row: 1 720575940629188431
row: 1 720575940614572967
row: 1 720575940612711603
row: 1 7205759

row: 1 720575940589625033
row: 1 720575940639068288
row: 1 720575940643709049
row: 1 720575940642783117
row: 1 720575940607947906
row: 1 720575940609138898
row: 1 INPUTS PROOFREAD
row: 1 720575940628389116
row: 1 720575940637944691
row: 1 720575940635257848
row: 1 720575940620734598
row: 1 720575940627487613
row: 1 720575940628005007
row: 1 720575940630462167
row: 1 720575940635720031
row: 1 720575940616958603
row: 1 720575940611009838
row: 1 720575940611395448
row: 1 720575940613137942
row: 1 720575940500899759
row: 1 720575940632088680
row: 1 720575940623625033
row: 1 720575940638570980
row: 1 720575940626709276
row: 1 720575940617791522
row: 1 720575940633581282
row: 1 720575940606214432
row: 1 720575940631225083
row: 1 720575940640332533
row: 1 720575940618303974
row: 1 720575940629797115
row: 1 720575940631727146
row: 1 720575940617750050
row: 1 720575940629236634
row: 1 720575940621536365
row: 1 720575940635196607
row: 1 720575940501023407
row: 1 720575940500901551
row: 1 7205759

row: 1 720575940628312425
row: 1 720575940628101253
row: 1 720575940639559357
row: 1 720575940652504566
row: 1 720575940631730127
row: 1 720575940620503461
row: 1 720575940609780693
row: 1 720575940613079781
row: 1 720575940629221891
row: 1 720575940629045035
row: 1 720575940611349458
row: 1 720575940625011436
row: 1 720575940606168608
row: 1 720575940633557075
row: 1 720575940606398398
row: 1 720575940631976403
row: 1 720575940624124935
row: 1 720575940625377468
row: 1 720575940405926342
row: 1 720575940638533092
row: 1 720575940626657854
row: 1 720575940632757964
row: 1 720575940659388801
row: 1 INPUTS PROOFREAD
row: 1 720575940639479630
row: 1 720575940622458356
row: 1 720575940635666367
row: 1 720575940621399974
row: 1 720575940624361991
row: 1 720575940626743908
row: 1 720575940627172441
row: 1 720575940629780031
row: 1 720575940610603332
row: 1 720575940624304903
row: 1 720575940629763966
row: 1 720575940615435926
row: 1 720575940645386519
row: 1 720575940637271214
row: 1 7205759

row: 1 720575940621489261
row: 1 720575940627172441
row: 1 720575940615435926
row: 1 720575940623858431
row: 1 720575940627010076
row: 1 720575940417471325
row: 1 720575940620793697
row: 1 720575940621804353
row: 1 720575940639479630
row: 1 720575940642142709
row: 1 720575940614431733
row: 1 720575940624361991
row: 1 720575940637271214
row: 1 720575940642774413
row: 1 720575940610603332
row: 1 INPUTS PROOFREAD
row: 1 720575940626170149
row: 1 720575940613070489
row: 1 720575940640543320
row: 1 720575940621354349
row: 1 720575940645912174
row: 1 720575940630918028
row: 1 720575940612895459
row: 1 720575940628762267
row: 1 720575940624881928
row: 1 720575940615100926
row: 1 720575940626498853
row: 1 720575940611046520
row: 1 720575940616022456
row: 1 720575940629563983
row: 1 720575940626440240
row: 1 720575940620169492
row: 1 720575940628786250
row: 1 720575940641531611
row: 1 720575940608590601
row: 1 720575940622019617
row: 1 720575940620656262
row: 1 720575940618261649
row: 1 7205759

In [75]:
### Setting important columns to str
df["presynaptic_ID"] = df["presynaptic_ID"].astype(str)
df["postsynaptic_ID"] = df["postsynaptic_ID"].astype(str)
df["seg_id"] = df["seg_id"].astype(str)

In [76]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_presynaptic_ID"] = updated_presynaptic_ID_column
df["Updated_presynaptic_ID"] = df["Updated_presynaptic_ID"].astype(str) 
df["Update_confidence"] = confidence_of_update

In [77]:
df

Unnamed: 0,presynaptic_ID,counts,count %,postsynaptic_ID,symbol,guess,lab,author,name,FlyWire proofread (Y/N),...,Updated_counts,duplicates,notes,annotations_link,Working on,optic_lobe_id,column_id,patch_id,hemisphere,Updated_presynaptic_ID
0,720575940604585662,24,,720575940637150821,L3,,Mathias Wernet Lab,Gizem Sancer,"Lamina monopolar cell 3, L3, FBbt_00003721 (F...",Y,...,,,,,,,,,R,720575940604585662
1,720575940622052869,17,,720575940637150821,Mi4,,Marion Silies,Lena Lörsch,"Medullary intrinsic neuron 4, Mi4, Mi4_R, FBbt...",Y,...,,,,,,,,,R,720575940622052869
2,720575940630947602,6,,720575940637150821,Tm16,,Marion Silies,Sebastian Mauricio Molina Obando,"Transmedullary neuron 16, Tm16, Tm16_R, FBbt_0...",Y,...,,,,,,,,,R,720575940630947602
3,720575940615003554,4,,720575940637150821,Tm16,,,,"Transmedullary neuron 16, Tm16, Tm16_R, FBbt_0...",Y,...,,,,,,,,,R,720575940638965055
4,720575940635652709,4,,720575940637150821,Mi13,,,,"Medullary intrinsic neuron Mi13, Mi13, Mi13_R,...",Y,...,,,,,,,,,R,720575940635652709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5944,720575940635142810,1,,720575940616066461,,,,,,,...,,,,,,,,,R,720575940635142810
5945,720575940654395297,1,,720575940616066461,,,,,,,...,,,,,,,,,R,720575940654395297
5946,720575940612557717,1,,720575940616066461,Dm12,,"Mala Murthy Lab, Sebastian Seung Lab",Ben Silverman,"Distal medullary amacrine neuron Dm12, Distal ...",Y,...,,,,,,,,,R,720575940636879215
5947,720575940627020616,1,,720575940616066461,,,,,,,...,,,,,,,,,R,720575940627020616


### 3. Updating counts between pre- and post synaptic partners

Strategy to save time:
1. (Step1) Create a dictionary with postsynaptic neuron's IDs as KEYS and their a input-neuron dataframe as VALUES , and then (step2) create a loop across presynaptic IDs to get the exact counting from the input-neuron-dataframe of postsynaptic neurons, loading the correct input-neuron-dataframe from the dictionary each time.

    Or, all in one single step: start a loop across unique postsynaptic IDs (be careful that the order is in the same as in the excel file, print them), get the input-neuron dataframe per each one in each round of the loop, and add a second loop across presynaptic IDs to get the exact counting.
    

2. Save the new countings in order of production in the same dataframe, as well as a column showing duplicated rows

In [81]:
#Implementing the "single step" option

unique_post_IDs = df[df['presynaptic_ID'] != 'INPUTS PROOFREAD']['postsynaptic_ID'].unique()
new_count_ls =  [] # For storing the new counts
copy_count_ls = [] # For storing the copy counts 
for post_id in unique_post_IDs: # loop across postsynaptic ids.
    if flywire.is_latest_root([post_id])[0]:
        curr_id = post_id
        #print(f'Getting inputs from: {curr_id}')
    else:
        updated_ID_df = flywire.update_ids(post_id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        curr_id = updated_ID_df["new_id"][0]
        #print(f'Getting inputs from: {curr_id}')
    
    curr_df = df[df['postsynaptic_ID'] == post_id].copy()
    curr_neurons_inputs = flywire.synapses.fetch_synapses(curr_id, pre=False, post=True, attach=True, 
                                             min_score=50, clean=True, transmitters=False, 
                                             neuropils=True, batch_size=30, 
                                             dataset='production', progress=True,mat= "live")
    
    
    pre_id_copies_dict = {} # For checking ID duplicates, triplicates, etc
    for i,pre_id in enumerate(curr_df["Updated_presynaptic_ID"]): # loop across presynaptic ids
        #Counting copies
        if pre_id in pre_id_copies_dict.keys():
            pre_id_copies_dict[pre_id]+= 1 # dupliocates, triplicates ...
        else:
            pre_id_copies_dict[pre_id] = 1 # initial count
    
        c = len(curr_neurons_inputs[curr_neurons_inputs['pre'] == int(pre_id)])
        new_count_ls.append(c) # count between specific pre and post
        copy_count_ls.append(pre_id_copies_dict[pre_id])
        #print(f'Counts with {pre_id}: {c}, confidence {curr_df["Update_confidence"].tolist()[i]}, copy {pre_id_copies_dict[pre_id]}')
    
    new_count_ls.append('INPUTS PROOFREAD')
    copy_count_ls.append('INPUTS PROOFREAD')
        


In [82]:
# Adding new columns to the main dataframe
df["Updated_counts"] = new_count_ls
df["duplicates"] =  copy_count_ls


### 4. Saving back to the excell file

In [83]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_dataframe_'+date_str)
writer.save()
writer.close()

### 3. Or, saving in a new excel file

In [56]:
#Saving in a new file

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

file_name = f'All_Tm9_neurons_input_count_segments_update_{date_str}.xlsx'
savePath = os.path.join(dataPath, file_name)
df.to_excel(savePath, sheet_name='Segments update')

### Faster update (code in progress...)

In [None]:
#Updating segments (FAST)
#Do all segments as once, but filter out columns with 'INPUTS PROOFREAD' first
filtered_df = df[df["presynaptic_ID"] =! 'INPUTS PROOFREAD'].copy()
segmentIDs = filtered_df["seg_id"]
pre_IDs = filtered_df["presynaptic_ID"]
post_IDs = filtered_df["postsynaptic_ID"]

new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
new_segmentIDs = new_segmentIDs_df["new_id"].tolist()


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
filtered_df["Updated_seg_id"] = new_segmentIDs
filtered_df["Updated_seg_id"] = filtered_df["Updated_seg_id"].astype(str) 

In [None]:
# Code not finished....

# Updating main postsynaptic neurons database

### 1. Loading postsynaptic neuron dataframe

In [23]:
# Choose path and file
dataPath = r'E:\Connectomics-Data\FlyWire\Excels\drive-data-sets'
fileName = 'Tm1 proofreadings_20230105.xlsx'
fileName = 'T4 proofreadings dorsal_20230406.xlsx'
fileName = 'Tm9 proofreadings_20230609.xlsx'
filePath = os.path.join(dataPath,fileName)

In [59]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
if df["seg_id"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
df = df.astype(str)
display(df.head())
segmentIDs = df["seg_id"].tolist()

Unnamed: 0,XYZ-Me,symbol,XYZ-Lo,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,patch_id,backbone proofread (Y/N),twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),link,who's working on it
0,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940631867211,"Fischbach & Dittrich, 1989",...,V,Y,N,N,,N,,?,,
1,,Tm9,,,L,Marion Silies,Jonas Chojetzki,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940629938298,"Fischbach & Dittrich, 1989",...,V,Y,Y,Y,?,N,?,Y,,
2,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940619904728,"Fischbach & Dittrich, 1989",...,V,Y,Y,Y,Y,N,N,Y,,
3,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940626539592,"Fischbach & Dittrich, 1989",...,V,N,N,N,,Y,,?,,
4,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940616954181,"Fischbach & Dittrich, 1989",...,V,Y,N,N,,N,,?,,


### 2. Update with CAVE (not preferred)

In [None]:
#Update IDs witth chunkedgraph module of CAVE

# For "segmentsIDs"
#Empty spaces are type float and will be filled with "0"
segmentsIDs_int = list(map(lambda x: 0 if type(x) == float else int(x),segmentIDs)) # From str to int
#to create a np.zeros array is important for the next step
new_segmentsIDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),segmentsIDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_segmentsIDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_segmentsIDs_int)) # From int to str

In [None]:
# For "pre_IDs"
#Empty spaces are type float and will be filled with "0"
pre_IDs_int = list(map(lambda x: 0 if type(x) == float or x == 'INPUTS PROOFREAD'  else int(x),pre_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_pre_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),pre_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_pre_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_pre_IDs_int)) # From int to str

In [None]:
# For "post_IDs"
#Empty spaces are type float and will be filled with "0"
post_IDs_int = list(map(lambda x: 0 if type(x) == float else int(x),post_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_post_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),post_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_post_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_post_IDs_int)) # From int to str

In [None]:
### Seleting the right pre_ID if the update gaves more than one
#Getting the correct pre_IDs than contact each post_ID

from functools import reduce

correct_IDs = {}
curr_post_ID = 'Start'
for idx,pre_IDs in  enumerate(new_pre_IDs_str):
        
    #If there are multiple IDs in an array
    if type(pre_IDs) != str and type(pre_IDs) == np.ndarray:
        #Creatting synapses dataframe only once per each post_ID
        if curr_post_ID != new_post_IDs_str[idx]:
            synapses = flywire.synapses.fetch_synapses(new_post_IDs_str[idx], pre=False, post=True, attach=True,
                                          min_score=50, clean=True, transmitters=False,
                                          neuropils=True, batch_size=30,
                                          dataset='production', progress=True,mat="live")
            #Update post_ID
            print(f"Looking at post_ID: {new_post_IDs_str[idx]}") 
            curr_post_ID = new_post_IDs_str[idx]
            
        #Proof connectivity to the respective post_ID for each of them
        for ID in pre_IDs:
            if synapses[synapses['pre'] == ID].empty:
                continue
            else: # Only add the pre_ID (and its index) which has valid synapses with the post_IDs
                if idx in correct_IDs.keys(): # If there is already a valid ID, add other valid IDs 
                    curr_value =correct_IDs[idx]
                    new_value = curr_value+"_"+str(ID)
                    correct_IDs[idx] =new_value
                else:
                    correct_IDs[idx]=str(ID)

# Fixing the updated pre_IDs_str_list
for key, value in correct_IDs.items():
    new_pre_IDs_str[key] = value
    


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_pre_IDs"] = new_pre_IDs_str
df["Updated_post_IDs"] = new_post_IDs_str

### 2. Update with FAFB (predered)

In [25]:
# Updating the segmentIDs
new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

Updating:   0%|          | 0/1375 [00:00<?, ?it/s]

In [26]:
new_segmentIDs_df["confidence"].unique()

array([1.  , 0.96, 0.84, 0.99, 0.89, 0.71, 0.9 , 0.8 , 0.79])

In [60]:
### Updating the dataframe
# Adding the new column to the data frame
df["Update_confidence"] = new_segmentIDs_df["confidence"].astype(str).tolist()
df["Updated_seg_id"] = new_segmentIDs_df["new_id"].astype(str).tolist()
df.head()

Unnamed: 0,XYZ-Me,symbol,XYZ-Lo,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),link,who's working on it,Update_confidence,Updated_seg_id
0,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940631867211,"Fischbach & Dittrich, 1989",...,N,N,,N,,?,,,1.0,720575940631867211
1,,Tm9,,,L,Marion Silies,Jonas Chojetzki,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940629938298,"Fischbach & Dittrich, 1989",...,Y,Y,?,N,?,Y,,,1.0,720575940629938298
2,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940619904728,"Fischbach & Dittrich, 1989",...,Y,Y,Y,N,N,Y,,,1.0,720575940619904728
3,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940626539592,"Fischbach & Dittrich, 1989",...,N,N,,Y,,?,,,1.0,720575940626539592
4,,Tm9,,,L,Marion Silies,Sebastian Molina-Obando,"Tm9, Tm9_R, transmedullary neuron Tm9, FBbt_00...",720575940616954181,"Fischbach & Dittrich, 1989",...,N,N,,N,,?,,,1.0,720575940616954181


In [61]:
df[df["Updated_seg_id"].duplicated() == True]

Unnamed: 0,XYZ-Me,symbol,XYZ-Lo,quality comment,hemisphere,lab,author,name,seg_id,identified_in,...,twigs proofread (Y/N),inputs_proofread (Y/N),dark_neurons (Y/N),bad_quality_rim (Y/N),healthy_L3 (Y/N),detached_lamina (Y/N),link,who's working on it,Update_confidence,Updated_seg_id


### 3. If wished: Reorder rows based on condition

In [None]:
sorted_df = df.sort_values(by = 'cluster_id').copy()

In [39]:
sorted_df = df.copy()

In [57]:
df["seg_id"]

0       720575940631867211
1       720575940629938298
2       720575940619904728
3       720575940626539592
4       720575940616954181
               ...        
1370    720575940602880736
1371    720575940612861462
1372    720575940620517312
1373    720575940615223199
1374    720575940619077686
Name: seg_id, Length: 1375, dtype: int64

### 3. If wished: Add specific column status based on another file

In [101]:
# Selecting column for the update and file from which the info comes
column_to_update = 'inputs_proofread (Y/N)' # 'inputs_proofread (Y/N)', detached_lamina (Y/N)
update_file_path = r'E:\Connectomics-Data\FlyWire\Txts\inputs_proofread'
update_file_with = 'root_ids_Tm9_left_optic_lobe_selection1_plus_patches.txt' # list of ids
update_file_with = 'root_ids_Tm9_left_optic_lobe_selection1_lamina_attached_only_more_strict.txt' # list of ids
update_file_with = 'root_ids_Tm9_right_optic_lobe_selection1.txt' # list of ids
update_file_with = 'root_ids_Tm9_all_input_proofread_20230609.txt' # list of ids

update_filePath =os.path.join(update_file_path,update_file_with)

# extractring info from the specific file
update_file_with_df = pd.read_csv(update_filePath)
update_file_with_ids_list = update_file_with_df.columns.tolist()

# Updating the list 
df[f'{column_to_update}_updated'] = np.where(df['seg_id'].isin(update_file_with_ids_list), "Y", "N")

In [102]:
np.where(df['seg_id'].isin(update_file_with_ids_list), "Y", "N")

array(['N', 'Y', 'Y', ..., 'Y', 'Y', 'N'], dtype='<U1')

In [103]:
len(set(df[(df[f'{column_to_update}_updated'] == 'Y') & (df[f'hemisphere'] == 'R')]['seg_id'].tolist()))

210

### 4. Saving back to excel file

In [105]:

# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()