In [1]:
import collections, random
import numpy as np
import pickle as pk
import pymania as mn
import matplotlib.pyplot as plt

from collections import Counter
from py2neo import Node, Relationship, Graph
from IPython.display import display, HTML

graph = Graph(host="canopus.cc.gatech.edu",password='1234')
subjects = set([126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145])

# Select Inconsistent links where the Reverse Connections are consistent

We start with selecting the links which are inconsistent across subjects. Inconsistency here refers to the link between a specific source and target nodes present in half of the subjects. With the 10 subjects chosen, we choose the links which are present in 5 subjects and not present in the other 5.

In [2]:
query = '''MATCH (n:ROI)-[r:MANIA2]->(m:ROI)
WHERE r.SUBJECT IN [126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145] AND n.name STARTS WITH 'L' AND r.is_connected=TRUE
WITH n.name AS source, m.name AS target, r.SUBJECT AS subject, r.correction_type as correction_type
WITH source, target, COUNT(*) AS connections, COLLECT(subject) AS subjects, COLLECT(correction_type) as correction_types WHERE connections=5
RETURN source, target, subjects, correction_types order by source, target'''
inconsistent_links = graph.run(query).data()

l = len(inconsistent_links)

In [3]:
selected_rois = []
roi_counts = dict()
html_inc = '<h4>Inconsistent Connections with consistent reverse connections</h4>'
html_inc += f'Total inconsistent connections: {l}'
html_inc += '<table><tr><th>Source</th><th>Target</th><th># Reverse Connections</th></tr>'
for i in range(l):
    source, target = inconsistent_links[i]['source'], inconsistent_links[i]['target']
    query = f"MATCH (n:ROI)-[r:MANIA2]->(m:ROI) WHERE n.name='{target}' AND m.name='{source}' AND r.is_connected=TRUE "
    query += "AND r.SUBJECT IN [126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145] "
    query += "RETURN r.SUBJECT as subject, r.correction_type as correction_type"
    rev = graph.run(query).data()
    num_rev = len(rev)
    if num_rev not in roi_counts: roi_counts[num_rev] = 0
    roi_counts[num_rev] += 1
    if num_rev < 3 or num_rev > 7:
        rev_dict = {'source':source, 'target':target, 'num_rev':num_rev}
        selected_rois.append(rev_dict)
        html_inc += f'<tr><td>{source}</td><td>{target}</td><td>{num_rev}</td></tr>' 
    query = f"MATCH (n:ROI)-[r:MANIA2]->(m:ROI) WHERE n.name='{target}' AND m.name='{source}' AND r.is_connected=FALSE "
    query += "AND r.SUBJECT IN [126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145] "
    query += "RETURN r.SUBJECT as subject, r.correction_type as correction_type"
    rev = graph.run(query).data()
    num_rev = len(rev)
    if num_rev == 10:
        rev_dict = {'source':source, 'target':target, 'num_rev':0}
        selected_rois.append(rev_dict)
        html_inc += f'<tr><td>{source}</td><td>{target}</td><td>0</td></tr>'
html_inc += f'</table>Total consistent reverse connections: {len(selected_rois)}'
display(HTML(html_inc))

Source,Target,# Reverse Connections
L1,L167,9
L119,L155,10
L120,L126,8
L142,L45,8
L173,L115,8
L62,L72,2
L88,L61,8


We see that that although there are 852 inconsistent connections, only 7 of them have the reverse connections which are consistent. The consistency here refers to the reverse link being present or absent in 8 or more subjects.

Given that there are very few inconsistent links where the reverse links are consistent, we intend to see the distribution of the reverse links for all the inconsistent links.

In [4]:
html = '<table><tr><th>#Subjects Connected</th><th># Links</th></tr>'
for k, v in collections.OrderedDict(sorted(roi_counts.items())).items():
    html += f'<tr><td>{k}</td><td>{v}</td></tr>'
html +='</table>'
display(HTML(html))

#Subjects Connected,# Links
2,1
3,9
4,48
5,722
6,50
7,16
8,4
9,1
10,1


This table should be read as of the 852 inconsistent connections, 722 reverse connections was connected in 5 subjects and so on.

We see that most of the reverse connections corresponding to the _inconsistent connections_ are also inconsistent. There are only very few links which are inconsistent in one direction and consistent in the other.

# Inconsistency within a Subject

Having verified that the inconsistent links have mostly inconsistent reverse links, we next investigate the inconsistency of links within a subject.

We start with our hypothesis that within a subject, there reverse connection should be present for all the connections detected. We start with verifying the number of links for each subject where a link is present in one direction and not the reverse direction.

In [5]:
query = '''MATCH (n:ROI)-[r:MANIA2]->(m:ROI)
WHERE r.SUBJECT IN [126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145] AND n.name STARTS WITH 'L' AND r.is_connected=TRUE
RETURN n.name AS source, m.name AS target, r.SUBJECT AS subject'''
data = graph.run(query).data()

links = dict()
for sub in subjects:
    links[sub] = set()
l = len(data)
for i in range(l):
    src, dst, sub = data[i]['source'], data[i]['target'], data[i]['subject']
    links[sub].add((src, dst))

In [6]:
mismatch = {sub:0 for sub in subjects}
total_links = {sub:0 for sub in subjects}
for sub in links:
    for link in links[sub]:
        src, dst = link
        total_links[sub] += 1
        if (dst, src) not in links[sub]:
            mismatch[sub] += 1

In [7]:
html = '<table><tr><th>Subject ID</th><th>Total Links</th><th># Links with no reverse links</th><th>Percentage of links with no reverse links</th></tr>'
for sub in subjects:
    per = '%3.2f' % (mismatch[sub]*100./total_links[sub])
    html += f'<tr><td>{sub}</td><td>{total_links[sub]}</td><td>{mismatch[sub]}</td><td>{per}%</td></tr>'
html += '</table>'
display(HTML(html))

Subject ID,Total Links,# Links with no reverse links,Percentage of links with no reverse links
152427,5957,69,1.16%
153227,6437,75,1.17%
146735,6444,62,0.96%
186545,6169,81,1.31%
188145,6157,85,1.38%
177140,6379,93,1.46%
180533,5463,81,1.48%
137431,6461,91,1.41%
126426,6623,65,0.98%
144125,6203,79,1.27%


We see that less than 1.5% of the connections does not have reverse connections, corresponding to each subject. Although this number is small, we want to compare this with the overall inconsistency across subjects.

# Inconsistency across Subjects

In [8]:
connections = dict()
total_connections = 0

query = """MATCH (n:ROI)-[r:MANIA2]->(m:ROI)
WHERE r.SUBJECT IN [126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145] AND n.name STARTS WITH 'L' AND r.is_connected=TRUE
WITH n.name AS source, m.name AS target, COUNT(*) AS connections
RETURN connections, COUNT(*) AS links"""
links = graph.run(query).data()
for link in links:
    connections[link['connections']] = link['links']
    total_connections += link['links']

query = """MATCH (n:ROI)-[r:MANIA2]->(m:ROI)
WHERE r.SUBJECT IN [126426, 137431, 144125, 146735, 152427, 153227, 177140, 180533, 186545, 188145] AND n.name STARTS WITH 'L' AND r.is_connected=FALSE
WITH n.name AS source, m.name AS target, COUNT(*) AS connections WHERE connections=10
RETURN connections, COUNT(*) AS links"""
no_links = graph.run(query).data()
for link in no_links:
    connections[10-link['connections']] = link['links']

In [9]:
html = '<table><tr><th>#Subjects Connected</th><th># Links</th><th>Total Links</th><th>% of Links</th></tr>'
for k, v in collections.OrderedDict(sorted(connections.items())).items():
    pct = '%4.2f' % (v*100./total_connections)
    if k == 0: pct = '--'
    html += f'<tr><td>{k}</td><td>{v}</td><td>{k*v}</td><td>{pct}%</td></tr>'
html +='</table>'
display(HTML(html))

#Subjects Connected,# Links,Total Links,% of Links
0,18719,0,--%
1,3428,3428,25.39%
2,1859,3718,13.77%
3,1291,3873,9.56%
4,1022,4088,7.57%
5,852,4260,6.31%
6,816,4896,6.04%
7,719,5033,5.33%
8,713,5704,5.28%
9,717,6453,5.31%


From these, we see that around 5-7% of the links are inconsistent across subjects. Compared to the inconsistency within a subject as we observed above around 1.5%, the inconsistency across subject is higher.

So, we should investigate further about the inconsistency of connections across subjects.