# Updating a proofreading table

This notebook contains functions and example scripts to update a proofreading table stored in an excel file

In [None]:
import os
import pandas as pd
import numpy as np
from fafbseg import flywire
from caveclient import CAVEclient

client = CAVEclient('flywire_fafb_production')

## A) Updating IDs

### 1. Loading Tm9 input neuron table

In [None]:
# Choose path and file
dataPath = r'C:\Users\sebas\Downloads'
fileName = 'All_Tm9_neurons_input_count_ME_R_20230102.xlsx'
filePath = os.path.join(dataPath,fileName)

In [None]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
if df["seg_id"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
display(df.head(1))
segmentIDs = df["seg_id"]
pre_IDs = df["presynaptic_ID"]
post_IDs = df["postsynaptic_ID"]

### 2. Updating IDs in a for loop t oconsider excel file extreucture

In [None]:
#Updating segments (SLOW)
new_segmentIDs_column = []
confidence_of_update = []
count = 1
for id in pre_IDs:
    count =+ 1
    if id == 'INPUTS PROOFREAD':
        new_segmentIDs_column.append('INPUTS PROOFREAD')
        confidence_of_update.append('INPUTS PROOFREAD')
    else:
        temp_segmentIDs_df = flywire.update_ids(id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        new_segmentIDs_column.append(str(temp_segmentIDs_df["new_id"][0]))
        confidence_of_update.append(temp_segmentIDs_df["confidence"][0])
    print(f'row: {count} {new_segmentIDs_column[-1]}')
    

In [None]:
print(set(confidence_of_update))

In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_seg_id"] = new_segmentIDs_column
df["Updated_seg_id"] = df["Updated_seg_id"].astype(str) 
df["Update_confidence"] = confidence_of_update

In [None]:
df.head(1)

In [None]:
filePath

### 3. Saving back to the excell file

In [None]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_IDs_'+date_str)
writer.save()
writer.close()

### 3. Or, saving in a new excel file

In [31]:
#Saving in a new file

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

file_name = f'All_Tm9_neurons_input_count_segments_update_{date_str}.xlsx'
savePath = os.path.join(dataPath, file_name)
df.to_excel(savePath, sheet_name='Segments update')

### Faster update (code in progress...)

In [None]:
#Updating segments (FAST)
#Do all segments as once, but filter out columns with 'INPUTS PROOFREAD' first
filtered_df = df[df["presynaptic_ID"] =! 'INPUTS PROOFREAD'].copy()
segmentIDs = filtered_df["seg_id"]
pre_IDs = filtered_df["presynaptic_ID"]
post_IDs = filtered_df["postsynaptic_ID"]

new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
new_segmentIDs = new_segmentIDs_df["new_id"].tolist()


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
filtered_df["Updated_seg_id"] = new_segmentIDs
filtered_df["Updated_seg_id"] = filtered_df["Updated_seg_id"].astype(str) 

### 1. Loading another dataframe

In [32]:
# Choose path and file
dataPath = r'C:\Users\sebas\Downloads'
fileName = 'Tm9 proofreadings_20221229.xlsx'
fileName = 'Tm1 proofreadings_20230105.xlsx'
filePath = os.path.join(dataPath,fileName)

In [33]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
if df["seg_id"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
display(df.head())
segmentIDs = df["seg_id"].tolist()

Unnamed: 0,1.0,XYZ,voxel_raw_x,voxel_raw_y,voxel_raw_z,symbol,hemisphere,lab,author,name,...,lab authorship (Y/N),inputs_proofread (Y/N),notes,annotations_link,Extra notes (see comments),Working on,cluster_id,patch_id,twigs proofread (Y/N),paired_Tm9
0,"45301, 58147, 5917","48257, 57194, 5249",48257.0,57194.0,5249.0,Tm1,R,Marion Silies,Annalena Oswald,"Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?local_id=ea7026658a0da...,merges to check (mi) checked (BG),,2.0,3.0,N,720575940624502013
1,"48378, 75605, 5574","56469, 74237, 5502",56469.0,74237.0,5502.0,Tm1,R,"Greg Jefferis, Marion Silies","Arti Yadav, Annalena Oswald","Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,,Annalena,1.0,2.0,N,720575940613521635
2,"62332, 93823, 5176","68122, 86392, 5321",68122.0,86392.0,5321.0,Tm1,R,"Marion Silies, Mala Murthy, Sebastian Seung","Annalena Oswald, Nash Hadjerol","Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,,,0.0,1.0,N,720575940620703936
3,"46174, 62535, 5673","53993, 60454, 5460",53993.0,60454.0,5460.0,Tm1,R,,,"Transmedullary neuron 1, Tm1, Tm1_R, FBbt_000...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,two merges to check (mi); done (LL),,2.0,3.0,N,720575940628205800
4,"45236, 57783, 5504","54511, 56401, 5424",54511.0,56401.0,5424.0,Tm1,R,,,"Transmedullary neuron 1, Tm1, Tm1_R, [FBbt_00...",...,Y,,,https://ngl.flywire.ai/?json_url=https://globa...,merges to check (mi); checked but still 2 that...,,2.0,3.0,N,720575940612306650


### 2. Update with CAVE (not preferred)

In [None]:
#Update IDs witth chunkedgraph module of CAVE

# For "segmentsIDs"
#Empty spaces are type float and will be filled with "0"
segmentsIDs_int = list(map(lambda x: 0 if type(x) == float else int(x),segmentIDs)) # From str to int
#to create a np.zeros array is important for the next step
new_segmentsIDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),segmentsIDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_segmentsIDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_segmentsIDs_int)) # From int to str

In [None]:
# For "pre_IDs"
#Empty spaces are type float and will be filled with "0"
pre_IDs_int = list(map(lambda x: 0 if type(x) == float or x == 'INPUTS PROOFREAD'  else int(x),pre_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_pre_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),pre_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_pre_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_pre_IDs_int)) # From int to str

In [None]:
# For "post_IDs"
#Empty spaces are type float and will be filled with "0"
post_IDs_int = list(map(lambda x: 0 if type(x) == float else int(x),post_IDs)) # From str to int
#to create a np.zeros array is important for the next step
new_post_IDs_int = list(map(lambda x: np.zeros(1) if x == 0 else client.chunkedgraph.get_latest_roots(x),post_IDs_int))
#Updated IDs leading to more than one ID a single ID will be kept inside [] brakets.
new_post_IDs_str = list(map(lambda x: str(x[0]) if x.size == 1 else x,new_post_IDs_int)) # From int to str

In [None]:
### Seleting the right pre_ID if the update gaves more than one
#Getting the correct pre_IDs than contact each post_ID

from functools import reduce

correct_IDs = {}
curr_post_ID = 'Start'
for idx,pre_IDs in  enumerate(new_pre_IDs_str):
        
    #If there are multiple IDs in an array
    if type(pre_IDs) != str and type(pre_IDs) == np.ndarray:
        #Creatting synapses dataframe only once per each post_ID
        if curr_post_ID != new_post_IDs_str[idx]:
            synapses = flywire.synapses.fetch_synapses(new_post_IDs_str[idx], pre=False, post=True, attach=True,
                                          min_score=50, clean=True, transmitters=False,
                                          neuropils=True, batch_size=30,
                                          dataset='production', progress=True,mat="live")
            #Update post_ID
            print(f"Looking at post_ID: {new_post_IDs_str[idx]}") 
            curr_post_ID = new_post_IDs_str[idx]
            
        #Proof connectivity to the respective post_ID for each of them
        for ID in pre_IDs:
            if synapses[synapses['pre'] == ID].empty:
                continue
            else: # Only add the pre_ID (and its index) which has valid synapses with the post_IDs
                if idx in correct_IDs.keys(): # If there is already a valid ID, add other valid IDs 
                    curr_value =correct_IDs[idx]
                    new_value = curr_value+"_"+str(ID)
                    correct_IDs[idx] =new_value
                else:
                    correct_IDs[idx]=str(ID)

# Fixing the updated pre_IDs_str_list
for key, value in correct_IDs.items():
    new_pre_IDs_str[key] = value
    


In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_pre_IDs"] = new_pre_IDs_str
df["Updated_post_IDs"] = new_post_IDs_str

### 2. Update with FAFB (predered)

In [34]:
# Updating the segmentIDs
new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
new_segmentIDs = new_segmentIDs_df["new_id"].tolist()

Updating:   0%|          | 0/7 [00:00<?, ?it/s]

In [35]:
new_segmentIDs_df["confidence"].unique()

array([1], dtype=int64)

In [36]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_seg_id"] = new_segmentIDs
df["Updated_seg_id"] = df["Updated_seg_id"].astype(str) 

In [37]:
df[df["Updated_seg_id"].duplicated() == True]

Unnamed: 0,1.0,XYZ,voxel_raw_x,voxel_raw_y,voxel_raw_z,symbol,hemisphere,lab,author,name,...,inputs_proofread (Y/N),notes,annotations_link,Extra notes (see comments),Working on,cluster_id,patch_id,twigs proofread (Y/N),paired_Tm9,Updated_seg_id


In [43]:
new_segmentIDs_df

Unnamed: 0,old_id,new_id,confidence,changed
0,720575940613143574,720575940623515597,1,True
1,720575940627285447,720575940627285447,1,False
2,720575940630078330,720575940630078330,1,False
3,720575940633718041,720575940633718041,1,False
4,720575940629908730,720575940629908730,1,False
5,720575940612397226,720575940612397226,1,False
6,720575940631366968,720575940660576385,1,True


### 3. Reorder rows base don condition

In [None]:
sorted_df = df.sort_values(by = 'cluster_id').copy()

In [39]:
sorted_df = df.copy()

In [44]:
df["Updated_seg_id"]

0    720575940623515597
1    720575940627285447
2    720575940630078330
3    720575940633718041
4    720575940629908730
5    720575940612397226
6    720575940660576385
Name: Updated_seg_id, dtype: object

### 4. Saving back to excel file

In [40]:

# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

sorted_df.to_excel(writer, sheet_name='Updated_IDs_'+date_str)
writer.save()
writer.close()

BadZipFile: File is not a zip file