# Updating a proofreading table or a list id file

This notebook contains functions and example scripts to update a proofreading table or lists of ids stored in an excel or txt file

Content per section:
<br>A) Updating IDs and presynaptic counts
<br>B) Updating main postsynaptic neurons database (table)
<br>B2) Updating all postsynaptic neurons databases (loop)
<br>C) Updating a list of IDs from a txt file
<br>D) Updating a list of IDs from any excel file
<br>E) Making a list of unique values

In [None]:
import os
import pandas as pd
import numpy as np
import fafbseg
import math
import itertools
from fafbseg import flywire
from caveclient import CAVEclient
from helpers.synapse_queries import combine_xyz, separate_xyz, synapse_count, filter_points, calculate_distance
client = CAVEclient('flywire_fafb_production')

# A) Updating IDs and presynaptic counts

## 1. Loading X neuron input neuron table

In [None]:
# Choose path and file
dataPath = r'Z:\Further projects\Heterogeneity across cell types\data\Excels\drive-data-sets' # your path 
fileName = f'Mi1_neurons_input_count_R_20240610.xlsx' # the file you want to update
filePath = os.path.join(dataPath,fileName)

In [None]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
df.head()

In [None]:
#Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
if df["postsynaptic_ID"][0] == 'asdf': 
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)

#df = df[df['counts'] >= 3].copy() # Removing rows based on absolute count filter
    
#Getting the lists of IDs to update
segmentIDs = df["seg_id"].copy()
pre_IDs = df["presynaptic_ID"].copy()
post_IDs = df["postsynaptic_ID"].copy()
df.head()


In [None]:
#Replacing the 'INPUTS PROOFREAD' labelled row of the lists for the previous postsynaptic cell ID
# An old format of the files in the Tm9 project had such "INPUTS PROOFREAD" lines. They needed to be discarded

for i, id in enumerate(pre_IDs):
    if id == 'INPUTS PROOFREAD':
        segmentIDs[i] = post_IDs[i-1] # The previous postsynaptic cell ID
        pre_IDs[i] = post_IDs[i-1]
        post_IDs[i] = post_IDs[i-1]

## 2. Updating IDs considering our excel file extructure

In [None]:
#Updating segments (trying to update in loops to not collapse the server)

confidence_of_update_pre = []
confidence_of_update_post = []
updated_presynaptic_ID_column = []
updated_postsynaptic_ID_column = []
pre_ID_i = 0
post_ID_i = 0
rounds_of = 100
curr_round = 0
print(f'Total rounds to perform: {math.ceil((len(pre_IDs)/rounds_of))}')

for i in range(0,math.ceil((len(pre_IDs)/rounds_of))):
    curr_round += 1
    
    #pre_IDs
    curr_pre_IDs = pre_IDs[pre_ID_i:pre_ID_i+rounds_of]
    temp_segmentIDs_df = flywire.update_ids(curr_pre_IDs.tolist(), stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
    updated_presynaptic_ID_column.append(temp_segmentIDs_df["new_id"].tolist())
    confidence_of_update_pre.append(temp_segmentIDs_df["confidence"].tolist())
    pre_ID_i +=rounds_of
    
    #post_IDs
    curr_post_IDs = post_IDs[post_ID_i:post_ID_i+rounds_of]
    temp_segmentIDs_df = flywire.update_ids(curr_post_IDs.tolist(), stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
    updated_postsynaptic_ID_column.append(temp_segmentIDs_df["new_id"].tolist())
    confidence_of_update_post.append(temp_segmentIDs_df["confidence"].tolist())
    post_ID_i +=rounds_of
    
    if curr_round%5 == 0: #printing current round every 5 rounds
        print(curr_round)

updated_presynaptic_ID_column = list(itertools.chain.from_iterable(updated_presynaptic_ID_column))
updated_postsynaptic_ID_column = list(itertools.chain.from_iterable(updated_postsynaptic_ID_column))
confidence_of_update_pre = list(itertools.chain.from_iterable(confidence_of_update_pre))
confidence_of_update_post = list(itertools.chain.from_iterable(confidence_of_update_post))

### 2.1 Adding important columns for next steps

In [None]:
### Setting important columns to str
df["presynaptic_ID"] = df["presynaptic_ID"].astype(str)
df["postsynaptic_ID"] = df["postsynaptic_ID"].astype(str)
df["seg_id"] = df["seg_id"].astype(str)

In [None]:
### Updating the dataframe
# Adding the new url column to the data frame
df["Updated_presynaptic_ID"] = updated_presynaptic_ID_column
df["Updated_presynaptic_ID"] = df["Updated_presynaptic_ID"].astype(str) 
df["Updated_postsynaptic_ID"] = updated_postsynaptic_ID_column
df["Updated_postsynaptic_ID"] = df["Updated_postsynaptic_ID"].astype(str) 
df["Update_confidence_pre"] = confidence_of_update_pre
df["Update_confidence_post"] = confidence_of_update_post

In [None]:
len(confidence_of_update_pre)

## 3. Updating counts between pre- and post synaptic partners

Strategy to save time:
1. (Step1) Create a dictionary with postsynaptic neuron's IDs as KEYS and their a input-neuron dataframe as VALUES , and then (step2) create a loop across presynaptic IDs to get the exact counting from the input-neuron-dataframe of postsynaptic neurons, loading the correct input-neuron-dataframe from the dictionary each time.

    Or, all in one single step: start a loop across unique postsynaptic IDs (be careful that the order is in the same as in the excel file, print them), get the input-neuron dataframe per each one in each round of the loop, and add a second loop across presynaptic IDs to get the exact counting.
    

2. Save the new countings in order of production in the same dataframe, as well as a column showing duplicated rows

In [None]:
#Implementing the "single step" option

unique_post_IDs = df[df['presynaptic_ID'] != 'INPUTS PROOFREAD']['postsynaptic_ID'].unique()
new_count_ls =  [] # For storing the new counts
copy_count_ls = [] # For storing the copy counts 
for post_id in unique_post_IDs: # loop across postsynaptic ids.
    if flywire.is_latest_root([post_id])[0]:
        curr_id = post_id
        print(f'Getting inputs from: {curr_id}')
    else:
        updated_ID_df = flywire.update_ids(post_id, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
        curr_id = updated_ID_df["new_id"][0]
        print(f'Getting inputs from: {curr_id} (updated)')
    
    curr_df = df[df['postsynaptic_ID'] == post_id].copy()
    curr_neurons_inputs = flywire.synapses.fetch_synapses(curr_id, pre=False, post=True, attach=True, 
                                             min_score=50, clean=True, transmitters=False, 
                                             neuropils=True, batch_size=30, 
                                             dataset='production', progress=True,mat= "live")
    
    if curr_neurons_inputs.empty: #Adding this to fix isues with retrieveing data
        #Adding NaNs
        empty_list = [None]* len(df[df['postsynaptic_ID'] == post_id])
        new_count_ls = new_count_ls + empty_list
        copy_count_ls = copy_count_ls + empty_list
    
    else: 
        
        #Filtering redundant / duplicated counts (threshold = 100 nm)
        combine_xyz(curr_neurons_inputs)
        points = curr_neurons_inputs['pre_pt_position'].tolist()
        points_no_duplicates = filter_points(points, threshold_distance = 100)
        curr_neurons_inputs_no_duplicates = curr_neurons_inputs[curr_neurons_inputs['pre_pt_position'].apply(lambda x: x in points_no_duplicates)].copy()

        pre_id_copies_dict = {} # For checking ID duplicates, triplicates, etc
        for i,pre_id in enumerate(curr_df["Updated_presynaptic_ID"]): # loop across presynaptic ids
            #Counting copies
            if pre_id in pre_id_copies_dict.keys():
                pre_id_copies_dict[pre_id]+= 1 # duplicates, triplicates ...
            else:
                pre_id_copies_dict[pre_id] = 1 # initial count

            c = len(curr_neurons_inputs_no_duplicates[curr_neurons_inputs_no_duplicates['pre_pt_root_id'] == int(pre_id)])
            new_count_ls.append(c) # count between specific pre and post
            copy_count_ls.append(pre_id_copies_dict[pre_id])
            #print(f'Counts with {pre_id}: {c}, confidence {curr_df["Update_confidence"].tolist()[i]}, copy {pre_id_copies_dict[pre_id]}')

        #In old files with this rows acting as separators
        #new_count_ls.append('INPUTS PROOFREAD')
        #copy_count_ls.append('INPUTS PROOFREAD')
        


In [None]:
# Adding new columns to the main dataframe
df["Updated_counts"] = new_count_ls
df["duplicates"] =  

df.head()

## 4. Saving back to the excell file

In [None]:
# Creating string for the date

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_dataframe_'+date_str)
writer.save()
writer.close()

### 3. Or, saving in a new excel file

In [None]:
#Saving in a new file
# Specify the corect file_name

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

file_name = f'Mi1_neurons_input_count_segments_update_{date_str}.xlsx'
savePath = os.path.join(dataPath, file_name)
df.to_excel(savePath, sheet_name='Segments update')

In [None]:
savePath

# B) Updating main postsynaptic neurons database

## 1. Loading postsynaptic neuron dataframe

In [None]:
# Choose path and file
PC_disc = 'D'
dataPath = f'{PC_disc}:\Connectomics-Data\FlyWire\Excels\drive-data-sets\database'
fileName = f'Tm9 proofreadings.xlsx'
filePath = os.path.join(dataPath,fileName)

In [None]:
#Loading file as DataFrame
df = pd.read_excel(filePath)
if df["seg_id"][0] == 'asdf': #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)
df = df.astype(str)
display(df.head())
segmentIDs = df["Updated_seg_id"].tolist()

In [None]:
print(segmentIDs[0:5])

## 2. Update with FAFB 

In [None]:
# Updating the segmentIDs
new_segmentIDs_df = flywire.update_ids(segmentIDs, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

In [None]:
new_segmentIDs_df["confidence"].unique()

In [None]:
### Updating the dataframe
# Adding the new column to the data frame
df["Update_confidence"] = new_segmentIDs_df["confidence"].astype(str).tolist()
df["Updated_seg_id"] = new_segmentIDs_df["new_id"].astype(str).tolist()
df.head()

In [None]:
#Checking duplicates in ids
df[df["Updated_seg_id"].duplicated() == True]

In [None]:
#Checking updated ids
df[df["Update_confidence"].astype(float) < 1.]

## 3. If wished: Reorder rows based on condition

In [None]:
sorted_df = df.sort_values(by = 'cluster_id').copy()

In [None]:
sorted_df = df.copy()

In [None]:
df["seg_id"]

## 3. If wished: Add specific column status based on another file

### 3.1 Based on a txt with list of ids

In [None]:
# Selecting column for the update and file from which the info comes
column_to_update = 'rim_area (Y/N)' # 'inputs_proofread (Y/N)', detached_lamina (Y/N), 'healthy_L3 (Y/N)', 'rim_area (Y/N)'

update_file_path = r'D:\Connectomics-Data\FlyWire\Txts\cell_type_proofread'
update_file_with = 'root_ids_Tm9_outer_rim_20231018.txt' # list of ids

update_filePath =os.path.join(update_file_path,update_file_with)

# extractring info from the specific file
update_file_with_df = pd.read_csv(update_filePath)
update_file_with_ids_list = update_file_with_df.columns.tolist()

# Updating the list 
df[f'{column_to_update}_updated'] = np.where(df['seg_id'].isin(update_file_with_ids_list), "Y", "N")
df.head()

### 3.2 Or, Based on excel files with ids in a column and extra information in other columns

In [None]:
# Selecting column for the update and file from which the info comes
#For XYZ columns
update_file_path = r'E:\Connectomics-Data\FlyWire\Excels'
update_file_with = f'XYZ_df_{date}.xlsx'
update_filePath =os.path.join(update_file_path,update_file_with)

# extractring info from the specific file
update_file_with_df = pd.read_excel(update_filePath)
update_file_with_df.head()


In [None]:
#Transfering info from one dataframe to another based on a reference column (here seg_id)
def update_dataframe_single_column(source_df, target_df, reference_column):
    # Create a dictionary mapping from the reference column to the source DataFrame
    reference_dict = source_df.groupby(reference_column).first().reset_index().to_dict(orient='records')
    reference_dict = {row[reference_column]: row for row in reference_dict}

    # Update the target DataFrame based on the reference column
    for i, row in target_df.iterrows():
        ref = row[reference_column]
        if ref in reference_dict:
            source_row = reference_dict[ref]
            target_df.loc[i] = source_row

    return target_df

In [None]:
# Function inputs
source_cols = ['XYZ-ME', 'XYZ-LO','seg_id']
target_cols = ['XYZ-ME', 'XYZ-LO','seg_id']
reference_column = 'seg_id'

# Selecting dataframes and resetting index
source_df = update_file_with_df[source_cols].copy()
source_df.reset_index(inplace = True, drop = True)
target_df = df[target_cols].copy()
target_df.reset_index(inplace = True, drop = True)


source_df = source_df.astype(str)
target_df = target_df.astype(str)

# Running the function and compleating the dataset
result_df = update_dataframe_single_column(source_df, target_df,reference_column)
result_df.head()

In [None]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

result_df = result_df.astype(str)
result_df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()

## 3. If wished: Add the center of mass of postsynaptic sites

In [None]:
#Function
def find_center_point(points, threshold):
    if isinstance(points, list):
        points = np.array(points)

    # Calculate the distances between each point and all other points
    distances = np.linalg.norm(points[:, np.newaxis] - points, axis=2)

    # Calculate the average distance for each point
    avg_distances = np.mean(distances, axis=1)

    # Find the indices of points within the threshold distance
    valid_indices = np.where(avg_distances < threshold)[0]

    # Check if there are any valid points
    if len(valid_indices) > 0:
        # Calculate the geometric center of valid points
        center_point = np.mean(points[valid_indices], axis=0)
        #Rounding
        center_point = np.round(center_point, decimals=1)
        
        # Find the closest point to the center
        closest_point_index = np.argmin(np.linalg.norm(points[valid_indices] - center_point, axis=1))
        closest_point = points[valid_indices][closest_point_index]
    else:
        center_point = np.array([0,0,0])
        closest_point = np.array([0,0,0])

    return center_point.tolist(), closest_point.tolist()

In [None]:
def combine_xyz(df):
    """
    Combines separated x, y and z column into one, changes units and adds new column names for
    generating a neuroglancer link with function nglui.statebuilder.helpers.make_synapse_neuroglancer_link

    Args:
        pandas data frame containing x,y and z as columns of the same length

    Returns:
        same pandas data frame containing a new column with [x/4,y/4,z/40] lists
    """
    # Generating the single column

    post_pt_position = []
    for x,y,z in zip(df['post_x'].tolist(),df['post_y'].tolist(),df['post_z'].tolist()):
        temp_ls = [x/4,y/4,z/40]
        post_pt_position.append(temp_ls)

    pre_pt_position = []
    for x,y,z in zip(df['pre_x'].tolist(),df['pre_y'].tolist(),df['pre_z'].tolist()):
        temp_ls = [x/4,y/4,z/40]
        pre_pt_position.append(temp_ls)

    #Adding new columns and names
    df['post_pt_position'] = post_pt_position
    df['pre_pt_position'] = pre_pt_position
    #Changing column names
    df.rename(columns={'pre': 'pre_pt_root_id', 'post': 'post_pt_root_id'}, inplace=True)

In [None]:
#Loading postsynaptic coordinated for each neuron in a specific neuropile and getting the center point
#The loop can take quite a lot of time depending on the ammount of ids!


#Looping for several rounds of X (e.g, 100)
rounds_of = 100
loop_number = 1 # Must start at "1" if not run before
start_point = (rounds_of*loop_number) - rounds_of
 
#Shortening the df to priorize id:

#short_df = df[(df['detached_lamina (Y/N)'] == 'N') &(df['inputs_proofread (Y/N)'] == 'Y')].copy()
short_df = df[df['XYZ-ME'] == 'nan'].copy()

In [None]:
len(short_df)

In [None]:
round(len(short_df)/rounds_of)

In [None]:
# Looping across chosen rows

import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

for i in range(loop_number, round(len(short_df)/rounds_of)+2):
    print(f'Loop #: {loop_number}')
    curr_df = short_df[start_point:start_point+rounds_of].copy()
    curr_df.reset_index(inplace=True,drop=True)

    XYZ_ME = []
    XYZ_LO = []
    for seg_id in curr_df["Updated_seg_id"]:

        #Print
        print(f'currently at: {seg_id}')
        # Getting coordinates of synaptic contacts
        neurons_inputs = flywire.synapses.fetch_synapses(seg_id, pre=False, post=True, attach=True, 
                                                     min_score=50, clean=True, transmitters=False, 
                                                     neuropils=True, batch_size=30, 
                                                     dataset='production', progress=True,mat= "live")
        combine_xyz(neurons_inputs)
        threshold = 5000

        try:
            #Find the center point with medulla coordinates
            neurons_inputs_ME = neurons_inputs[neurons_inputs['neuropil'].str.contains('ME')].copy()
            points = neurons_inputs_ME['post_pt_position'].tolist()
            center_point, closest_point = find_center_point(points, threshold)
            XYZ_ME.append(closest_point)
        except: # to catch some point clouds that have no ME 
            XYZ_ME.append([0,0,0])
            
        try:
            #Find the center point with lobula coordinates
            neurons_inputs_LO = neurons_inputs[neurons_inputs['neuropil'].str.contains('LO')].copy()
            points = neurons_inputs_LO['post_pt_position'].tolist()
            center_point, closest_point = find_center_point(points, threshold)
            XYZ_LO.append(closest_point)
        except: # to catch some point clouds that have no LO labels
            XYZ_LO.append([0,0,0])
            

    XYZ_LO_strings = [','.join(map(str, sublist)) for sublist in XYZ_LO]
    XYZ_ME_strings = [','.join(map(str, sublist)) for sublist in XYZ_ME]

    #saving
    XYZ_df = pd.DataFrame(XYZ_ME_strings, columns=['XYZ-ME'])
    XYZ_df['XYZ-LO'] = XYZ_LO_strings
    XYZ_df['Updated_seg_id'] =  curr_df['Updated_seg_id']
    XYZ_df['seg_id'] =  curr_df['seg_id']
    XYZ_df.to_excel(f'D:\Connectomics-Data\FlyWire\Excels\drive-data-sets\XYZ_df_{loop_number}_{date_str}.xlsx', index=False)
    start_point += rounds_of
    loop_number += 1


### 4. Saving back to excel file

In [None]:

# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()

# B2) Updating all postsynaptic neurons databases (loop)

## 1. Loading data bases of interest in a loop 

In [None]:
# Data paths
# Choose path and file
dataPath = r'Z:\Further projects\Heterogeneity across cell types\data\Excels\drive-data-sets\database' # write your path

fileName_ls = glob(dataPath +"\\"+ "*.xlsx")


#Creating the database in a loop
df_ls = []
for fileName in fileName_ls:
    print(f'Importing: {fileName}')
    filePath = os.path.join(dataPath,fileName)
    df = pd.read_excel(filePath)
    #Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
    if df["seg_id"][0] == 'asdf': 
        df = df.iloc[1: , :]
        df.reset_index(inplace=True,drop=True)
    df_ls.append(df)

# C) Updating of list of IDs from a txt file / or a plain excell file


## 1. Loading the data from a txt file

In [None]:
# Choose path and file
dataPath = r'Z:\Further projects\Heterogeneity across cell types\data\Txts\cell_type_poofread'# write your path
fileDate = '20231106'
fileName = f'root_ids_T4d_R_{fileDate}.txt'
filePath = os.path.join(dataPath,fileName)
ids_df = pd.read_csv(filePath, sep = ",")
curr_ID_ls = ids_df.columns.tolist()
curr_ID_ls = [s for s in curr_ID_ls if ".1" not in s]
print(curr_ID_ls)

In [None]:
#Updating the IDs via Fafbseg
updated_ID_df = fafbseg.flywire.update_ids(curr_ID_ls, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

In [None]:
updated_ID_df[updated_ID_df['confidence'] < 0.8]#

In [None]:
len(updated_ID_df['new_id'].tolist())
len(updated_ID_df['new_id'].unique().tolist())


## 2. Saving data in txt file

In [None]:
#Saving the data

updated_fileName = f'Updated_{fileName}'
updated_filePath = os.path.join(dataPath,updated_fileName)

id_list = list(set(updated_ID_df['new_id'].unique().tolist()))
with open(updated_filePath , "w") as output:
    output.write(str(id_list))


## 1. Or, loading the data from an excell file

In [None]:
# Choose path and file

dataPath = r'Z:\Further projects\Heterogeneity across cell types\data\Excels\drive-data-sets\database' # write your path
fileName = f'Mi1 proofreadings.xlsx'
filePath = os.path.join(dataPath,fileName)

#Loading file as DataFrame
df = pd.read_excel(filePath)


#Dropping the fisrt row ('asdf' was added as a walk-around to set that column values as type str)
if df["seg_id"][0] == 'asdf': 
    df = df.iloc[1: , :]
    df.reset_index(inplace=True,drop=True)

#Dropping dupllicates
result_df = df.drop_duplicates(subset=["Updated_seg_id"], keep='first').copy()

#Quick look on the dataframe
display(result_df.head())
    
#Getting the lists of IDs to update
curr_ID_ls = result_df["Updated_seg_id"].tolist()

In [None]:
#Updating all IDs at once
updated_ID_df = fafbseg.flywire.update_ids(curr_ID_ls, stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)

In [None]:
# Or, Updating the IDs via Fafbseg in a for loop

_start = 0
_steps = 100
_last = _steps
_rounds =  round(len(curr_ID_ls)/ _steps) +1

print(f'Rounds to perform: {_rounds}')
updated_ID_df = pd.DataFrame()
for i in range(0,_rounds):
    #print(f'Round {i}')
    curr_ID_df = fafbseg.flywire.update_ids(curr_ID_ls[_start:_last], stop_layer=2, supervoxels=None, timestamp=None, dataset='production', progress=True)
    updated_ID_df = pd.concat([updated_ID_df,curr_ID_df])
    _start = _start + _steps
    _last = _last + _steps


In [None]:
result_df.reset_index(drop=True, inplace=True)
result_df['Updated_seg_ids'] = updated_ID_df['new_id'].astype(str).tolist()
result_df['Updated_confidence'] = updated_ID_df['confidence'].astype(str).tolist()

In [None]:
result_df.tail()

In [None]:
display(updated_ID_df.head())
print('Update confidences: ')
print(set(updated_ID_df['confidence'].tolist()))

In [None]:
updated_ID_df[updated_ID_df['confidence'] < 1]

## 2. Saving back in the excel file

In [None]:
# Creating string for the date
import datetime
x = datetime.datetime.now()
date_str = x.strftime("%d") + x.strftime("%b") + x.strftime("%Y")

# Writting in an existing excel file
from openpyxl import load_workbook
book = load_workbook(filePath)
writer = pd.ExcelWriter(filePath, engine = 'openpyxl')
writer.book = book

result_df.to_excel(writer, sheet_name='Updated_table_'+date_str) #sorted_df
writer.save()
writer.close()

# E) Making a list of unique values

In [None]:
PC_disc = 'D'
dataPath =  f'{PC_disc}:\Connectomics-Data\FlyWire\Txts'

#File one
fileName_1 = f'temp.txt'
filePath = os.path.join(dataPath,fileName_1)
df_1 = pd.read_csv(filePath, delimiter = "\t",header=None)
list1 = list(set(df_1[0].tolist()))

# Saving function
def save_list_to_file(file_path, input_list):
    df = pd.DataFrame(input_list, columns=['Items'])
    df.to_csv(file_path, header=False, index=False)
    
    
PC_disc = 'D'

file_path_1 = os.path.join(dataPath,f'Unique_{fileName_1}')
save_list_to_file(file_path_1, list1)