In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pymaid
import logging
import random
from graspologic.match import graph_match
from graspologic.simulations import er_np
from graspologic.utils import is_unweighted
from pkg.platy import _get_folder, load_connectome_normal_lcc_annotations_v2, load_connectome_normal_lcc_annotations_v3, load_connectome_lcc_normal_adj, load_left_adj, load_right_adj, load_left_adj_labels_with_class_v2, load_right_adj_labels_with_class_v2, load_head_adj_labels_with_class_v2, load_pygidium_adj_labels_with_class_v2, load_0_adj, load_1_adj_labels_with_class_v2, load_2_adj_labels_with_class_v2, load_3_adj_labels_with_class_v2, load_left_adj_labels_with_class_v3, load_right_adj_labels_with_class_v3

967
967
919
919
724
724
79
79
317
317
395
395
323
323
        skids   side    class   segment   type  group
0     2015233   left  sensory      head  100.0    NaN
1     1548290   left   muscle         1    NaN    NaN
2     1318919   left  sensory      head   88.0   15.0
3     2015241   left  sensory      head  100.0    NaN
4     1769485  right  sensory       NaN    NaN    NaN
...       ...    ...      ...       ...    ...    ...
2028  1597423  right   muscle  pygidium    NaN    NaN
2029  1302513   left  sensory      head    NaN    NaN
2030  1671147  right    motor         1  165.0    NaN
2031  1376251   left    inter      head    NaN    NaN
2032  1048573   left    inter         3    NaN    NaN

[2033 rows x 6 columns]


In [2]:
#treat df1 as the true/nadine pairs, and the columns of df2 as the extracted left and right indexes from the left and right adjacencies
df1 = pd.DataFrame({'L': [1, 2, 4, 3, 7, 6, 5, 8], 'R': ["a", "b", "d", "c", "g", "f", "e", "h"]})
df2 = pd.DataFrame({'L': [1, 3, 12, 9, 10, 11, 2, 4, 6, 5], 'R': ["a", "d", "h", "r", "f", "u", "b", "w", "q", "c"]})
print("df1")
print(df1, "\n")
print("df2")
print(df2)

df1
   L  R
0  1  a
1  2  b
2  4  d
3  3  c
4  7  g
5  6  f
6  5  e
7  8  h 

df2
    L  R
0   1  a
1   3  d
2  12  h
3   9  r
4  10  f
5  11  u
6   2  b
7   4  w
8   6  q
9   5  c


In [3]:
#format df2 by initializing as I have done previously

#grab the neurons that are paired in df1, and both neurons in that pair exist in the L and R column of df2 respectively
L2_list = list(df2.L)
R2_list = list(df2.R)

L2_in_pair = []
R2_in_pair = []

for i in range(len(df1)):
    if(df1.loc[i, "L"] in L2_list and df1.loc[i, "R"] in R2_list):
        L2_in_pair.append(df1.loc[i, "L"])
        R2_in_pair.append(df1.loc[i, "R"])

print(L2_in_pair)
print(R2_in_pair)

[1, 2, 4, 3, 6]
['a', 'b', 'd', 'c', 'f']


In [4]:
from numpy.random import default_rng
rng = default_rng()

#grab the rest of the neurons in the L and R column of df2, and shuffle them
L2_not_in_pair = np.array([val for val in L2_list if val not in L2_in_pair])
R2_not_in_pair = np.array([val for val in R2_list if val not in R2_in_pair])

#shuffle the lists
rand_ind_left = rng.choice(len(L2_not_in_pair), size=len(L2_not_in_pair), replace=False)
L2_not_in_pair_rand = L2_not_in_pair[rand_ind_left]

rand_ind_right = rng.choice(len(R2_not_in_pair), size=len(R2_not_in_pair), replace=False)
R2_not_in_pair_rand = R2_not_in_pair[rand_ind_right]

print(L2_not_in_pair_rand)
print(R2_not_in_pair_rand)

[ 9 11  5 12 10]
['r' 'w' 'q' 'u' 'h']


In [5]:
#concatenate the pairs and not pairs lists

L2_to_match = np.append(L2_in_pair, L2_not_in_pair_rand, axis=0)
R2_to_match = np.append(R2_in_pair, R2_not_in_pair_rand, axis=0)

pairs = pd.DataFrame(list(zip(L2_to_match, R2_to_match)), columns = ["skid-ID LEFT", "skid-ID RIGHT"])
pairs

Unnamed: 0,skid-ID LEFT,skid-ID RIGHT
0,1,a
1,2,b
2,4,d
3,3,c
4,6,f
5,9,r
6,11,w
7,5,q
8,12,u
9,10,h


In [9]:
#simulate graph matching on these pairs, keep the second and third row intact but the others different
right_list_after_matching = ["r", "b", "d", "a", "c", "f", "w", "u", "h", "q"]

updated_pairs = pairs.copy()
updated_pairs["skid-ID RIGHT"] = right_list_after_matching
print(updated_pairs)

#should expect accuracy of 0.4

   skid-ID LEFT skid-ID RIGHT
0             1             r
1             2             b
2             4             d
3             3             a
4             6             c
5             9             f
6            11             w
7             5             u
8            12             h
9            10             q


In [10]:
#calcuate accuracy
mask = updated_pairs["skid-ID LEFT"].index < len(L2_in_pair)
L2_to_consider = updated_pairs["skid-ID LEFT"][mask]
R2_to_consider = updated_pairs["skid-ID RIGHT"][mask]

matched_pairs_test = pd.Series(index = L2_to_consider.values, data = R2_to_consider.values)

#find the R1 values that correspond to the L2_to_consider values
R1_to_consider = df1.loc[df1["L"].isin(L2_to_consider.values), "R"]

true_pairs_test = pd.Series(index = L2_to_consider.values, data = R1_to_consider.values)

print(true_pairs_test)
print(matched_pairs_test)
(matched_pairs_test == true_pairs_test).mean()

1    a
2    b
4    d
3    c
6    f
dtype: object
1    r
2    b
4    d
3    a
6    c
dtype: object


0.4