# Updating a proofreading table

This notebook contains functions and example scripts to update a proofreading table stored in an excel file

In [67]:
import os
import pandas as pd
import numpy as np
from fafbseg import flywire
from caveclient import CAVEclient
client = CAVEclient('flywire_fafb_production')

## A) Updating IDs

### 1. Loading Tm9 input neuron table

In [68]:
# Choose path and file
dataPath = r'C:\Users\sebas\Downloads'
date = '20230119'
fileName = f'All_Tm9_neurons_input_count_ME_R_{date}.xlsx'
filePath = os.path.join(dataPath,fileName)

In [69]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
df.head()

Unnamed: 0,presynaptic_ID,Updated_seg_id,Update_confidence,counts,count %,postsynaptic_ID,"Comments about synapses proofreading (e.g., false negatíve found)","URL buhmann postsynapses (annotate only as: true, false or false negative)",Synapses proofread (Y/N),XYZ soma,...,seg_id,twigs proofread (Y/N),FlyWire proofread (Y/N),identified_in,lab authorship (Y/N),notes,annotations_link,Extra notes as comments (initials),Working on,column_id
0,,,,,,asdf,,,,,...,,,,,,,,,,
1,7.205759406593888e+17,7.205759406593888e+17,1.0,38.0,27.536232,720575940629938298,false negative found,https://ngl.flywire.ai/?json_url=https://globa...,Y,no soma,...,7.205759406593888e+17,Y,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
2,7.205759406152385e+17,7.205759406152385e+17,1.0,12.0,8.695652,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,"59355, 90055, 5482",...,7.205759406152385e+17,N,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
3,7.205759406138088e+17,7.205759406138088e+17,1.0,8.0,5.797101,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,y,"59642, 87973, 5337",...,7.205759406138088e+17,N,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
4,7.205759406250186e+17,7.205759406250186e+17,1.0,5.0,3.623188,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,no soma,...,7.205759406250186e+17,Y,Y,"Fischbach & Dittrich, 1989",Y,projections in layer M2 and M4,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1


In [70]:
if df["postsynaptic_ID"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
segmentIDs = df["seg_id"]
pre_IDs = df["presynaptic_ID"]
post_IDs = df["postsynaptic_ID"]
df.head()

Unnamed: 0,presynaptic_ID,Updated_seg_id,Update_confidence,counts,count %,postsynaptic_ID,"Comments about synapses proofreading (e.g., false negatíve found)","URL buhmann postsynapses (annotate only as: true, false or false negative)",Synapses proofread (Y/N),XYZ soma,...,seg_id,twigs proofread (Y/N),FlyWire proofread (Y/N),identified_in,lab authorship (Y/N),notes,annotations_link,Extra notes as comments (initials),Working on,column_id
0,720575940659388801,720575940659388801,1,38,27.536232,720575940629938298,false negative found,https://ngl.flywire.ai/?json_url=https://globa...,Y,no soma,...,720575940659388801,Y,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
1,720575940615238668,720575940615238668,1,12,8.695652,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,"59355, 90055, 5482",...,720575940615238668,N,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
2,720575940613808815,720575940613808815,1,8,5.797101,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,y,"59642, 87973, 5337",...,720575940613808815,N,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
3,720575940625018512,720575940625018512,1,5,3.623188,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,no soma,...,720575940625018512,Y,Y,"Fischbach & Dittrich, 1989",Y,projections in layer M2 and M4,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1
4,720575940607582338,720575940607582338,1,5,3.623188,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,,...,720575940607582338,Y,N,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,"done, not marked as complete. Too difficult to...",,V1


### 2. Updating IDs considering our excel file extructure

Updating segments will be slow since we do it in a loop. Please just run this part if you really consider that the IDs in which your are mostly interested in need an update

In [71]:
#Updating segments 
updated_presynaptic_ID_column = []
confidence_of_update = []
count = 1
for id in pre_IDs:
    count =+ 1
    if id == 'INPUTS PROOFREAD':
        updated_presynaptic_ID_column.append('INPUTS PROOFREAD')
        confidence_of_update.append('INPUTS PROOFREAD')
    else:
        temp_segmentIDs_df = flywire.update_ids(id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        updated_presynaptic_ID_column.append(str(temp_segmentIDs_df["new_id"][0]))
        confidence_of_update.append(temp_segmentIDs_df["confidence"][0])
    print(f'row: {count} {updated_presynaptic_ID_column[-1]}')
    

row: 1 720575940659388801
row: 1 720575940615238668
row: 1 720575940613808815
row: 1 720575940625018512
row: 1 720575940607582338
row: 1 720575940614237736
row: 1 720575940612520666
row: 1 720575940628312425
row: 1 720575940615007143
row: 1 720575940605245984
row: 1 720575940618464084
row: 1 720575940610585998
row: 1 720575940620503461
row: 1 720575940639559357
row: 1 720575940610613902
row: 1 720575940617694164
row: 1 720575940631339026
row: 1 720575940623818423
row: 1 720575940624241589
row: 1 720575940614237480
row: 1 720575940631976403
row: 1 720575940607335113
row: 1 720575940627168798
row: 1 720575940632254765
row: 1 720575940610400142
row: 1 720575940639005006
row: 1 720575940620233381
row: 1 720575940620501157
row: 1 720575940612520666
row: 1 720575940637733086
row: 1 720575940624124935
row: 1 720575940634442265
row: 1 720575940607582338
row: 1 720575940628041969
row: 1 720575940636947045
row: 1 720575940405923014
row: 1 720575940632726732
row: 1 INPUTS PROOFREAD
row: 1 7205759

row: 1 720575940608768585
row: 1 720575940631857721
row: 1 720575940608867657
row: 1 720575940611123320
row: 1 720575940638143551
row: 1 720575940615062690
row: 1 720575940618174237
row: 1 720575940631116375
row: 1 720575940632418605
row: 1 720575940622242342
row: 1 720575940416991814
row: 1 720575940644981614
row: 1 720575940631727146
row: 1 720575940611388648
row: 1 720575940609668435
row: 1 720575940608228107
row: 1 720575940614558119
row: 1 720575940635065239
row: 1 720575940618938886
row: 1 720575940620459201
row: 1 720575940626459236
row: 1 720575940645022756
row: 1 720575940608590601
row: 1 720575940646753827
row: 1 720575940605202092
row: 1 720575940626392010
row: 1 720575940627304073
row: 1 INPUTS PROOFREAD
row: 1 720575940624985700
row: 1 720575940632418605
row: 1 720575940626719065
row: 1 720575940617817181
row: 1 720575940637788213
row: 1 720575940632171724
row: 1 720575940626392010
row: 1 720575940606300081
row: 1 720575940600114348
row: 1 720575940635065239
row: 1 7205759

row: 1 720575940632757279
row: 1 720575940635302763
row: 1 720575940646550563
row: 1 720575940626343639
row: 1 720575940612750322
row: 1 720575940617130326
row: 1 INPUTS PROOFREAD
row: 1 720575940627027352
row: 1 720575940631683087
row: 1 720575940639054406
row: 1 720575940627516170
row: 1 720575940624353975
row: 1 720575940630776438
row: 1 720575940618484253
row: 1 720575940638515290
row: 1 720575940629763966
row: 1 720575940622647766
row: 1 720575940613196624
row: 1 720575940627313438
row: 1 720575940607335113
row: 1 720575940628612730
row: 1 720575940615403158
row: 1 720575940618076701
row: 1 720575940660084609
row: 1 720575940635196607
row: 1 720575940589625033
row: 1 720575940639068288
row: 1 720575940643709049
row: 1 720575940642783117
row: 1 720575940622542691
row: 1 720575940609138898
row: 1 INPUTS PROOFREAD
row: 1 720575940628389116
row: 1 720575940637944691
row: 1 720575940635257848
row: 1 720575940620734598
row: 1 720575940614374451
row: 1 720575940628005007
row: 1 720575940

row: 1 720575940612403442
row: 1 720575940612626831
row: 1 720575940619736625
row: 1 720575940628799130
row: 1 720575940645912174
row: 1 720575940620968942
row: 1 720575940621386849
row: 1 720575940611106021
row: 1 720575940608094729
row: 1 720575940621211920
row: 1 720575940600480472
row: 1 720575940604438604
row: 1 720575940619120902
row: 1 720575940604560204
row: 1 INPUTS PROOFREAD
row: 1 720575940624337079
row: 1 720575940614237480
row: 1 720575940614366248
row: 1 720575940629212715
row: 1 720575940612520666
row: 1 720575940620105648
row: 1 720575940627313438
row: 1 720575940618530331
row: 1 720575940622521370
row: 1 720575940624908327
row: 1 720575940623094771
row: 1 720575940607335113
row: 1 720575940625018512
row: 1 720575940620233381
row: 1 720575940628312425
row: 1 720575940623861030
row: 1 720575940639559357
row: 1 720575940652504566
row: 1 720575940619696455
row: 1 720575940620503461
row: 1 720575940613808815
row: 1 720575940646753827
row: 1 720575940629221891
row: 1 7205759

row: 1 720575940623159176
row: 1 720575940613275158
row: 1 720575940608526805
row: 1 720575940621225581
row: 1 720575940630918028
row: 1 720575940621046465
row: 1 720575940572954861
row: 1 720575940625923376
row: 1 720575940632560671
row: 1 720575940635378842
row: 1 720575940607335113
row: 1 720575940635368346
row: 1 720575940639832398
row: 1 720575940613275158
row: 1 720575940644722967
row: 1 720575940616088076
row: 1 720575940629906554
row: 1 720575940609876066
row: 1 INPUTS PROOFREAD
row: 1 720575940626467258
row: 1 720575940629763966
row: 1 720575940614065141
row: 1 720575940620106581
row: 1 720575940604810976
row: 1 720575940645386519
row: 1 720575940633471661
row: 1 720575940621399974
row: 1 720575940631707467
row: 1 720575940614669479
row: 1 720575940624146869
row: 1 720575940632623571
row: 1 720575940614207151
row: 1 720575940624945900
row: 1 720575940432684331
row: 1 720575940612589170
row: 1 720575940634208025
row: 1 720575940608556037
row: 1 720575940627172441
row: 1 7205759

In [72]:
### Setting important columns to str
df["presynaptic_ID"] = df["presynaptic_ID"].astype(str)
df["presynaptic_ID"] = df["presynaptic_ID"].astype(str)

In [73]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_presynaptic_ID"] = updated_presynaptic_ID_column
df["Updated_presynaptic_ID"] = df["Updated_presynaptic_ID"].astype(str) 
df["Update_confidence"] = confidence_of_update

In [74]:
df

Unnamed: 0,presynaptic_ID,Updated_seg_id,Update_confidence,counts,count %,postsynaptic_ID,"Comments about synapses proofreading (e.g., false negatíve found)","URL buhmann postsynapses (annotate only as: true, false or false negative)",Synapses proofread (Y/N),XYZ soma,...,twigs proofread (Y/N),FlyWire proofread (Y/N),identified_in,lab authorship (Y/N),notes,annotations_link,Extra notes as comments (initials),Working on,column_id,Updated_presynaptic_ID
0,720575940659388801,720575940659388801,1,38,27.536232,720575940629938298,false negative found,https://ngl.flywire.ai/?json_url=https://globa...,Y,no soma,...,Y,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1,720575940659388801
1,720575940615238668,720575940615238668,1,12,8.695652,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,"59355, 90055, 5482",...,N,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1,720575940615238668
2,720575940613808815,720575940613808815,1,8,5.797101,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,y,"59642, 87973, 5337",...,N,Y,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1,720575940613808815
3,720575940625018512,720575940625018512,1,5,3.623188,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,no soma,...,Y,Y,"Fischbach & Dittrich, 1989",Y,projections in layer M2 and M4,https://ngl.flywire.ai/?json_url=https://globa...,done,,V1,720575940625018512
4,720575940607582338,720575940607582338,1,5,3.623188,720575940629938298,,https://ngl.flywire.ai/?json_url=https://globa...,Y,,...,Y,N,"Fischbach & Dittrich, 1989",Y,,https://ngl.flywire.ai/?json_url=https://globa...,"done, not marked as complete. Too difficult to...",,V1,720575940607582338
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1383,720575940614508178,720575940614508178,1,1,0.684932,720575940622355226,,,,,...,,,,,,,,,D9,720575940614508178
1384,720575940618905291,720575940618905291,1,1,0.684932,720575940622355226,,,,,...,,,,,,,,,D9,720575940618905291
1385,720575940612102629,720575940612102629,1,1,0.684932,720575940622355226,,,,,...,,,,,,,,,D9,720575940612102629
1386,720575940608786505,720575940608786505,1,1,0.684932,720575940622355226,,,,,...,,,,,,,,,D9,720575940608786505


### 3. Updating counts between pre- and post synaptic partners

Strategy to save time:
1. (Step1) Create a dictionary with postsynaptic neuron's IDs as KEYS and their a input-neuron dataframe as VALUES , and then (step2) create a loop across presynaptic IDs to get the exact counting from the input-neuron-dataframe of postsynaptic neurons, loading the correct input-neuron-dataframe from the dictionary each time.

    Or, all in one single step: start a loop across unique postsynaptic IDs (be careful that the order is in the same as in the excel file, print them), get the input-neuron dataframe per each one in each round of the loop, and add a second loop across presynaptic IDs to get the exact counting.
    

2. Save the new countings in order of production in the same dataframe, as well as a column showing duplicated rows

In [85]:
#Implementing the "single step" option

unique_post_IDs = df[df['presynaptic_ID'] != 'INPUTS PROOFREAD']['postsynaptic_ID'].unique()
new_count_ls =  [] # For storing the new counts
copy_count_ls = [] # For storing the copy counts 
for post_id in unique_post_IDs: # loop across postsynaptic ids.
    if flywire.is_latest_root([post_id])[0]:
        curr_id = post_id
        #print(f'Getting inputs from: {curr_id}')
    else:
        updated_ID_df = fafbseg.flywire.update_ids(post_id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        curr_id = updated_ID_df["new_id"][0]
        #print(f'Getting inputs from: {curr_id}')
    
    curr_df = df[df['postsynaptic_ID'] == post_id].copy()
    curr_neurons_inputs = flywire.synapses.fetch_synapses(post_id, pre=False, post=True, attach=True, 
                                             min_score=50, clean=True, transmitters=False, 
                                             neuropils=True, batch_size=30, 
                                             dataset='production', progress=True,mat= "live")
    
    
    pre_id_copies_dict = {} # For checking ID duplicates, triplicates, etc
    for i,pre_id in enumerate(curr_df["Updated_presynaptic_ID"]): # loop across presynaptic ids
        #Counting copies
        if pre_id in pre_id_copies_dict.keys():
            pre_id_copies_dict[pre_id]+= 1 # dupliocates, triplicates ...
        else:
            pre_id_copies_dict[pre_id] = 1 # initial count
    
        c = len(curr_neurons_inputs[curr_neurons_inputs['pre'] == int(pre_id)])
        new_count_ls.append(c) # count between specific pre and post
        copy_count_ls.append(pre_id_copies_dict[pre_id])
        #print(f'Counts with {pre_id}: {c}, confidence {curr_df["Update_confidence"].tolist()[i]}, copy {pre_id_copies_dict[pre_id]}')
    new_count_ls.append('INPUTS PROOFREAD')
    copy_count_ls.append('INPUTS PROOFREAD')
        
        

    
    

Getting inputs from: 720575940629938298
Counts with 720575940659388801: 40, confidence 1, copy 1
Counts with 720575940615238668: 12, confidence 1, copy 1
Counts with 720575940613808815: 8, confidence 1, copy 1
Counts with 720575940625018512: 5, confidence 1, copy 1
Counts with 720575940607582338: 6, confidence 1, copy 1
Counts with 720575940614237736: 5, confidence 1, copy 1
Counts with 720575940612520666: 5, confidence 1, copy 1
Counts with 720575940628312425: 4, confidence 1, copy 1
Counts with 720575940615007143: 4, confidence 1, copy 1
Counts with 720575940605245984: 4, confidence 1, copy 1
Counts with 720575940618464084: 4, confidence 1, copy 1
Counts with 720575940610585998: 4, confidence 1, copy 1
Counts with 720575940620503461: 2, confidence 1, copy 1
Counts with 720575940639559357: 3, confidence 1, copy 1
Counts with 720575940610613902: 3, confidence 1, copy 1
Counts with 720575940617694164: 2, confidence 0.89, copy 1
Counts with 720575940631339026: 4, confidence 1, copy 1
Cou

Getting inputs from: 720575940623950059
Counts with 720575940625936330: 31, confidence 1, copy 1
Counts with 720575940627241880: 25, confidence 0.97, copy 1
Counts with 720575940607335113: 19, confidence 1, copy 1
Counts with 720575940626266000: 7, confidence 1, copy 1
Counts with 720575940652060150: 5, confidence 1, copy 1
Counts with 720575940614237480: 3, confidence 1, copy 1
Counts with 720575940624115690: 4, confidence 1, copy 1
Counts with 720575940637973338: 3, confidence 1, copy 1
Counts with 720575940615007143: 3, confidence 1, copy 1
Counts with 720575940620375489: 2, confidence 1, copy 1
Counts with 720575940639005006: 2, confidence 1, copy 1
Counts with 720575940608664924: 2, confidence 1, copy 1
Counts with 720575940619683889: 2, confidence 1, copy 1
Counts with 720575940628160584: 2, confidence 1, copy 1
Counts with 720575940621822377: 1, confidence 1, copy 1
Counts with 720575940610482062: 1, confidence 1, copy 1
Counts with 720575940617511129: 1, confidence 1, copy 1
Co

Getting inputs from: 720575940612306650
Counts with 720575940624985700: 36, confidence 1, copy 1
Counts with 720575940632418605: 15, confidence 1, copy 1
Counts with 720575940626719065: 5, confidence 1, copy 1
Counts with 720575940617817181: 5, confidence 1, copy 1
Counts with 720575940637788213: 3, confidence 1, copy 1
Counts with 720575940632171724: 3, confidence 1, copy 1
Counts with 720575940626392010: 3, confidence 1, copy 1
Counts with 720575940606300081: 3, confidence 1, copy 1
Counts with 720575940600114348: 2, confidence 1, copy 1
Counts with 720575940635065239: 2, confidence 1, copy 1
Counts with 720575940612771299: 2, confidence 1, copy 1
Counts with 720575940623861030: 2, confidence 1, copy 1
Counts with 720575940649696633: 2, confidence 1, copy 1
Counts with 720575940645336855: 2, confidence 1, copy 1
Counts with 720575940624882494: 2, confidence 1, copy 1
Counts with 720575940611123320: 1, confidence 1, copy 1
Counts with 720575940617745670: 1, confidence 1, copy 1
Counts

Getting inputs from: 720575940652877302
Counts with 720575940614113036: 41, confidence 1, copy 1
Counts with 720575940620141737: 29, confidence 1, copy 1
Counts with 720575940627568703: 11, confidence 1, copy 1
Counts with 720575940606464860: 7, confidence 1, copy 1
Counts with 720575940610779768: 5, confidence 1, copy 1
Counts with 720575940617682525: 5, confidence 1, copy 1
Counts with 720575940613146218: 6, confidence 1, copy 1
Counts with 720575940621016046: 5, confidence 1, copy 1
Counts with 720575940631919402: 4, confidence 1, copy 1
Counts with 720575940633557459: 4, confidence 1, copy 1
Counts with 720575940612161941: 3, confidence 0.88, copy 1
Counts with 720575940660576385: 4, confidence 1, copy 1
Counts with 720575940622116265: 3, confidence 1, copy 1
Counts with 720575940628005263: 3, confidence 1, copy 1
Counts with 720575940638223337: 2, confidence 1, copy 1
Counts with 720575940612319601: 2, confidence 1, copy 1
Counts with 720575940640317272: 2, confidence 0.69, copy 1

Getting inputs from: 720575940628593482
Counts with 720575940605289900: 34, confidence 1, copy 1
Counts with 720575940625144590: 29, confidence 1, copy 1
Counts with 720575940623301912: 6, confidence 1, copy 1
Counts with 720575940633471661: 5, confidence 1.0, copy 1
Counts with 720575940638350564: 5, confidence 1, copy 1
Counts with 720575940618199429: 5, confidence 1, copy 1
Counts with 720575940623861030: 4, confidence 1, copy 1
Counts with 720575940628005263: 5, confidence 1, copy 1
Counts with 720575940642559885: 3, confidence 1, copy 1
Counts with 720575940617869798: 3, confidence 1, copy 1
Counts with 720575940619375995: 3, confidence 1, copy 1
Counts with 720575940614891774: 3, confidence 1, copy 1
Counts with 720575940613798125: 2, confidence 1, copy 1
Counts with 720575940615025810: 2, confidence 1, copy 1
Counts with 720575940618115229: 2, confidence 1, copy 1
Counts with 720575940613602797: 2, confidence 1, copy 1
Counts with 720575940624519432: 2, confidence 1, copy 1
Coun

Getting inputs from: 720575940624320695
Counts with 720575940611125112: 18, confidence 1, copy 1
Counts with 720575940615452561: 11, confidence 1, copy 1
Counts with 720575940623526292: 6, confidence 1, copy 1
Counts with 720575940614891774: 5, confidence 1, copy 1
Counts with 720575940613232918: 5, confidence 1, copy 1
Counts with 720575940614243112: 5, confidence 1, copy 1
Counts with 720575940632991021: 4, confidence 1, copy 1
Counts with 720575940620251375: 3, confidence 1, copy 1
Counts with 720575940633007524: 3, confidence 1, copy 1
Counts with 720575940623315640: 3, confidence 1, copy 1
Counts with 720575940623427827: 2, confidence 1, copy 1
Counts with 720575940607204361: 1, confidence 1, copy 1
Counts with 720575940612361957: 1, confidence 1, copy 1
Counts with 720575940610770552: 1, confidence 1, copy 1
Counts with 720575940597824012: 1, confidence 1, copy 1
Counts with 720575940606573681: 1, confidence 1, copy 1
Counts with 720575940614427253: 1, confidence 1, copy 1
Counts

Getting inputs from: 720575940633399213
Counts with 720575940625214986: 30, confidence 1, copy 1
Counts with 720575940649894777: 27, confidence 1, copy 1
Counts with 720575940623861030: 9, confidence 1, copy 1
Counts with 720575940622441174: 7, confidence 1, copy 1
Counts with 720575940621033563: 6, confidence 1, copy 1
Counts with 720575940629689794: 6, confidence 1, copy 1
Counts with 720575940640328280: 5, confidence 1, copy 1
Counts with 720575940637757150: 4, confidence 1, copy 1
Counts with 720575940615089002: 4, confidence 1, copy 1
Counts with 720575940613146218: 4, confidence 1, copy 1
Counts with 720575940610779768: 3, confidence 1, copy 1
Counts with 720575940607335113: 2, confidence 1, copy 1
Counts with 720575940620141737: 2, confidence 1, copy 1
Counts with 720575940606464860: 2, confidence 1, copy 1
Counts with 720575940610908024: 2, confidence 1, copy 1
Counts with 720575940614378500: 2, confidence 1, copy 1
Counts with 720575940618154580: 2, confidence 1, copy 1
Counts

Getting inputs from: 720575940628156932
Counts with 720575940615429343: 24, confidence 1, copy 1
Counts with 720575940629020213: 12, confidence 1, copy 1
Counts with 720575940618527045: 8, confidence 1, copy 1
Counts with 720575940631026519: 7, confidence 1, copy 1
Counts with 720575940607335113: 6, confidence 1, copy 1
Counts with 720575940605757984: 6, confidence 1, copy 1
Counts with 720575940612750322: 9, confidence 1, copy 1
Counts with 720575940631919402: 4, confidence 1, copy 1
Counts with 720575940623861030: 4, confidence 1, copy 1
Counts with 720575940618532635: 4, confidence 0.75, copy 1
Counts with 720575940622620184: 3, confidence 1, copy 1
Counts with 720575940619288929: 3, confidence 1, copy 1
Counts with 720575940621850906: 3, confidence 1, copy 1
Counts with 720575940627558991: 3, confidence 1, copy 1
Counts with 720575940612750322: 9, confidence 1, copy 2
Counts with 720575940623340073: 2, confidence 1, copy 1
Counts with 720575940619452830: 2, confidence 1, copy 1
Cou

In [88]:
# Adding new columns to the main dataframe
df["Updated_counts"] = new_count_ls
df["duplicates"] =  copy_count_ls


### 4. Saving back to the excell file

In [89]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_dataframe_'+date_str)
writer.save()
writer.close()

### 3. Or, saving in a new excel file

In [31]:
#Saving in a new file

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

file_name = f'All_Tm9_neurons_input_count_segments_update_{date_str}.xlsx'
savePath = os.path.join(dataPath, file_name)
df.to_excel(savePath, sheet_name='Segments update')

### Faster update (code in progress...)

In [None]:
#Updating segments (FAST)
#Do all segments as once, but filter out columns with 'INPUTS PROOFREAD' first
filtered_df = df[df["presynaptic_ID"] =! 'INPUTS PROOFREAD'].copy()
segmentIDs = filtered_df["seg_id"]
pre_IDs = filtered_df["presynaptic_ID"]
post_IDs = filtered_df["postsynaptic_ID"]

new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
new_segmentIDs = new_segmentIDs_df["new_id"].tolist()


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
filtered_df["Updated_seg_id"] = new_segmentIDs
filtered_df["Updated_seg_id"] = filtered_df["Updated_seg_id"].astype(str) 

In [None]:
# Code not finished....

### 1. Loading postsynaptic neuron dataframe

In [32]:
# Choose path and file
dataPath = r'C:\Users\sebas\Downloads'
fileName = 'Tm9 proofreadings_20221229.xlsx'
fileName = 'Tm1 proofreadings_20230105.xlsx'
filePath = os.path.join(dataPath,fileName)

In [33]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
if df["seg_id"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
display(df.head())
segmentIDs = df["seg_id"].tolist()

Unnamed: 0,1.0,XYZ,voxel_raw_x,voxel_raw_y,voxel_raw_z,symbol,hemisphere,lab,author,name,...,lab authorship (Y/N),inputs_proofread (Y/N),notes,annotations_link,Extra notes (see comments),Working on,cluster_id,patch_id,twigs proofread (Y/N),paired_Tm9
0,"45301, 58147, 5917","48257, 57194, 5249",48257.0,57194.0,5249.0,Tm1,R,Marion Silies,Annalena Oswald,"Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?local_id=ea7026658a0da...,merges to check (mi) checked (BG),,2.0,3.0,N,720575940624502013
1,"48378, 75605, 5574","56469, 74237, 5502",56469.0,74237.0,5502.0,Tm1,R,"Greg Jefferis, Marion Silies","Arti Yadav, Annalena Oswald","Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,,Annalena,1.0,2.0,N,720575940613521635
2,"62332, 93823, 5176","68122, 86392, 5321",68122.0,86392.0,5321.0,Tm1,R,"Marion Silies, Mala Murthy, Sebastian Seung","Annalena Oswald, Nash Hadjerol","Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,,,0.0,1.0,N,720575940620703936
3,"46174, 62535, 5673","53993, 60454, 5460",53993.0,60454.0,5460.0,Tm1,R,,,"Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,two merges to check (mi); done (LL),,2.0,3.0,N,720575940628205800
4,"45236, 57783, 5504","54511, 56401, 5424",54511.0,56401.0,5424.0,Tm1,R,,,"Transmedullary neuron 1, Tm1, Tm1_R, [FBbt_00...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,merges to check (mi); checked but still 2 that...,,2.0,3.0,N,720575940612306650


### 2. Update with CAVE (not preferred)

In [None]:
#Update IDs witth chunkedgraph module of CAVE

# For "segmentsIDs"
#Empty spaces are type float and will be filled with "0"
segmentsIDs_int = list(map(lambda x: 0 if type(x) == float else int(x),segmentIDs)) # From str to int
#to create a np.zeros array is important for the next step
new_segmentsIDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),segmentsIDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_segmentsIDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_segmentsIDs_int)) # From int to str

In [None]:
# For "pre_IDs"
#Empty spaces are type float and will be filled with "0"
pre_IDs_int = list(map(lambda x: 0 if type(x) == float or x == 'INPUTS PROOFREAD'  else int(x),pre_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_pre_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),pre_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_pre_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_pre_IDs_int)) # From int to str

In [None]:
# For "post_IDs"
#Empty spaces are type float and will be filled with "0"
post_IDs_int = list(map(lambda x: 0 if type(x) == float else int(x),post_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_post_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),post_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_post_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_post_IDs_int)) # From int to str

In [None]:
### Seleting the right pre_ID if the update gaves more than one
#Getting the correct pre_IDs than contact each post_ID

from functools import reduce

correct_IDs = {}
curr_post_ID = 'Start'
for idx,pre_IDs in  enumerate(new_pre_IDs_str):
        
    #If there are multiple IDs in an array
    if type(pre_IDs) != str and type(pre_IDs) == np.ndarray:
        #Creatting synapses dataframe only once per each post_ID
        if curr_post_ID != new_post_IDs_str[idx]:
            synapses = flywire.synapses.fetch_synapses(new_post_IDs_str[idx], pre=False, post=True, attach=True,
                                          min_score=50, clean=True, transmitters=False,
                                          neuropils=True, batch_size=30,
                                          dataset='production', progress=True,mat="live")
            #Update post_ID
            print(f"Looking at post_ID: {new_post_IDs_str[idx]}") 
            curr_post_ID = new_post_IDs_str[idx]
            
        #Proof connectivity to the respective post_ID for each of them
        for ID in pre_IDs:
            if synapses[synapses['pre'] == ID].empty:
                continue
            else: # Only add the pre_ID (and its index) which has valid synapses with the post_IDs
                if idx in correct_IDs.keys(): # If there is already a valid ID, add other valid IDs 
                    curr_value =correct_IDs[idx]
                    new_value = curr_value+"_"+str(ID)
                    correct_IDs[idx] =new_value
                else:
                    correct_IDs[idx]=str(ID)

# Fixing the updated pre_IDs_str_list
for key, value in correct_IDs.items():
    new_pre_IDs_str[key] = value
    


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_pre_IDs"] = new_pre_IDs_str
df["Updated_post_IDs"] = new_post_IDs_str

### 2. Update with FAFB (predered)

In [34]:
# Updating the segmentIDs
new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
new_segmentIDs = new_segmentIDs_df["new_id"].tolist()

Updating:   0%|          | 0/7 [00:00<?, ?it/s]

In [35]:
new_segmentIDs_df["confidence"].unique()

array([1], dtype=int64)

In [36]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_seg_id"] = new_segmentIDs
df["Updated_seg_id"] = df["Updated_seg_id"].astype(str) 

In [37]:
df[df["Updated_seg_id"].duplicated() == True]

Unnamed: 0,1.0,XYZ,voxel_raw_x,voxel_raw_y,voxel_raw_z,symbol,hemisphere,lab,author,name,...,inputs_proofread (Y/N),notes,annotations_link,Extra notes (see comments),Working on,cluster_id,patch_id,twigs proofread (Y/N),paired_Tm9,Updated_seg_id


In [43]:
new_segmentIDs_df

Unnamed: 0,old_id,new_id,confidence,changed
0,720575940613143574,720575940623515597,1,True
1,720575940627285447,720575940627285447,1,False
2,720575940630078330,720575940630078330,1,False
3,720575940633718041,720575940633718041,1,False
4,720575940629908730,720575940629908730,1,False
5,720575940612397226,720575940612397226,1,False
6,720575940631366968,720575940660576385,1,True


### 3. Reorder rows base don condition

In [None]:
sorted_df = df.sort_values(by = 'cluster_id').copy()

In [39]:
sorted_df = df.copy()

In [44]:
df["Updated_seg_id"]

0    720575940623515597
1    720575940627285447
2    720575940630078330
3    720575940633718041
4    720575940629908730
5    720575940612397226
6    720575940660576385
Name: Updated_seg_id, dtype: object

### 4. Saving back to excel file

In [None]:

# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

sorted_df.to_excel(writer, sheet_name='Updated_IDs_'+date_str)
writer.save()
writer.close()