# Identify overlap between COVID-19 KG and neurodegeneration KGs

In [1]:
import time
import sys
import os

from collections import defaultdict
import csv
import pybel
import networkx as nx
import pybel_tools
from networkx import connected_components

  from tqdm.autonotebook import tqdm


In [2]:
sys.version

'3.8.3 (default, Jul  2 2020, 11:26:31) \n[Clang 10.0.0 ]'

In [3]:
time.asctime()

'Tue Oct 19 10:45:44 2021'

In [4]:
pybel.get_version()

'0.15.4'

In [5]:
pybel_tools.utils.get_version()

'0.7.2'

## Knowledge assemblies June 2018 version (AD: 5.0.5 & PD: 1.1.1)

In [6]:
# https://raw.githubusercontent.com/neurommsig/neurommsig-knowledge/master/neurommsig_knowledge/alzheimers.bel

In [7]:
alzheimers =  pybel.from_bel_script(
    '/Users/danieldomingo/Downloads/alzheimers.bel',
    no_identifier_validation=True, allow_definition_failures=True
)



In [8]:
alzheimers.number_of_edges()

14519

In [None]:
# https://raw.githubusercontent.com/neurommsig/neurommsig-knowledge/master/neurommsig_knowledge/parkinsons.bel

In [9]:
parkinsons =  pybel.from_bel_script(
    '/Users/danieldomingo/Downloads/parkinsons.bel',
    no_identifier_validation=True, allow_definition_failures=True
)

In [10]:
parkinsons.number_of_edges()

5279

In [11]:
# https://raw.githubusercontent.com/covid19kg/covid19kg/master/covid19kg/_cache.bel.nodelink.json

In [12]:
covid_graph = pybel.from_nodelink_file('/Users/danieldomingo/Downloads/_cache.bel.nodelink.json')

In [13]:
covid_graph.number_of_edges()

10223

Approaches
=====

1. Common edges when (subject) - (specific relationship) - (object)
2. Common edges when (subject) - (any relationship) - (object)

Define method to extract common relationships

In [14]:
dementia = pybel.union([alzheimers, parkinsons])

Collapse central dogma nodes 

In [15]:
pybel.struct.mutation.collapse_to_genes(dementia)
pybel.struct.mutation.collapse_to_genes(covid_graph)

In [16]:
def find_common_relationships(edges_1, edges_2, edge):
    """Common relations."""
    common_edges = {relation for relation in edges_1[edge]}.intersection(
        {relation for relation in edges_2[edge]}
    )
    
    if common_edges:
        return common_edges, True
    
    return {relation for relation in edges_1[edge]}.union(
        {relation for relation in edges_2[edge]}
    ), False
        

def get_triples(graph):
    triples = defaultdict(set)
    
    for subject_node, object_node, edge_properties in graph.edges(data=True):
        
        if not isinstance(subject_node, pybel.dsl.BaseAbundance) or not isinstance(object_node, pybel.dsl.BaseAbundance):
            continue
            
        if subject_node.namespace == 'ncbigene':
            subject_name = subject_node
#         elif subject_node.namespace == 'HGNC':
#             subject_name = subject_node.identifier
#             print(subject_name)
        else:
            subject_name = subject_node
            
        if object_node.namespace == 'ncbigene':
            object_name = object_node
#         elif object_node.namespace == 'HGNC':
#             object_name = object_node.identifier
        else:
            object_name = object_node
            
        triples[(subject_name, object_name)].add(edge_properties['relation'])
        
    return triples

def find_common_edges_between_two_graphs(graph_1, graph_2):
        
    graph_1_triples = get_triples(graph_1)
    graph_2_triples = get_triples(graph_2)
    
    common_edges = {}
    
    for (source, target) in graph_1_triples:
        
        if (source, target) in graph_2_triples:
            common_edges[(source, target)] = find_common_relationships(
                graph_1_triples, graph_2_triples, (source, target),
            )
    
    return common_edges
    

Search specific COVID genes (https://www.nature.com/articles/d41586-021-01827-w) on the AD thingy

In [17]:
genes = [
    'SLC6A20',
    'IFNAR1',
    'IFNAR2',
    'OAS1',
    'OAS2',
    'OAS3',
    'FOXP4',
    'DPP9',
    'TYK2',
]

In [18]:
names = {
    node.name
    for node in dementia.nodes()
    if isinstance(node, pybel.dsl.BaseAbundance)
}

In [19]:
len(names)

3670

In [20]:
for name in names:
    if name in genes:
        print(name)
    

In [21]:
for name in names:
    if name.startswith('SLC6A'):
        print(name)

SLC6A3
SLC6A11


Calculate edges between two graphs

In [22]:
common_edges = find_common_edges_between_two_graphs(dementia, covid_graph)

In [23]:
filter_common_edges = {}

for (_subject, _object), relationships in common_edges.items():
    
    if not relationships:
        continue
    
    if len(relationships) == 1 and list(relationships)[0] in {'transcribedTo', 'translatedTo'}:
        continue
    
    filter_common_edges[(_subject, _object)] = relationships

    print('{} {}: {}'.format(_subject.name, _object.name, relationships))

angiotensin II vasoconstriction: ({'association', 'increases'}, False)
losartan angiotensin receptor antagonist: ({'isA'}, True)
losartan AGTR1: ({'association', 'decreases'}, False)
inflammasome complex CASP1: ({'association', 'increases'}, False)
IL10 IL6: ({'positiveCorrelation', 'increases'}, False)
CASP1 IL1B: ({'increases'}, True)
DDIT3 BCL2L11: ({'increases'}, True)
DDX58 MAVS: ({'increases'}, True)
IFIH1 MAVS: ({'increases'}, True)


Export to csv 

In [24]:
def list_or_tuple(x):
    return isinstance(x, (list, tuple))

def flatten(sequence, to_expand=list_or_tuple):
    for item in sequence:
        if to_expand(item):
            for subitem in flatten(item, to_expand):
                yield subitem
        else:
            yield item
            
def node_to_string(node):
    """Wrapper to export node tuple to csv"""   
    return node.as_bel()

def export_results_to_csv(file_name, results, full_match=None):
    """Export the results to csv
    
    :param str file_name: name of the file
    :param bool full_match: export only fully matching relationship with the same relationship
    """
    
    with open(file_name, 'w+', encoding='utf-8') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',',  quotechar='|', quoting=csv.QUOTE_MINIMAL)
        
        spamwriter.writerow(['Subject', 'Object', 'Common Relationships (if empty means they do not match in both diseases)'])
        
        for (_subject, _object), relationships in results.items():
                        
            if full_match and not relationships:
                continue
            spamwriter.writerow([_subject.name, _object.name, ' '.join(relationships[0])])


In [25]:
DIR_PATH = os.path.dirname(os.path.realpath('__file__'))

RESULTS = os.path.abspath(os.path.join(DIR_PATH, os.pardir))

COMMON_EDGES_CSV = os.path.join(RESULTS, 'common_edges_ad_covid.csv')
COMMON_EDGES__WITHOUT_RELATION_CSV = os.path.join(RESULTS, 'common_edges_ad_covid_without_relation_check.csv')

COMMON_EDGE_GRAPHML = os.path.join(RESULTS, 'common_edges.graphml')

Approach #1

In [26]:
export_results_to_csv(COMMON_EDGES_CSV, filter_common_edges, full_match=True)

Approach #2

In [27]:
export_results_to_csv(COMMON_EDGES__WITHOUT_RELATION_CSV, filter_common_edges)