# Create summary files for diseases, compounds, and indications

In [1]:
import json

import py2neo
import pandas

In [2]:
with open('../all-features/servers.json') as read_file:
    instances = json.load(read_file)

for instance in instances:
    if instance['name'] == 'rephetio-v2.0':
        uri = 'http://localhost:{}/db/data/'.format(instance['port'])    
        neo = py2neo.Graph(uri)
        break
neo

<Graph uri='http://localhost:7500/db/data/'>

In [3]:
def to_df(record_list):
    """Convert a py2neo RecordList to a dataframe"""
    return pandas.DataFrame(record_list.records, columns = record_list.columns)

## Retreive compounds and diseases that are connected

In [4]:
query = '''
MATCH (disease:Disease)
WHERE exists((disease)-[]-())
RETURN
  disease.identifier AS disease_id,
  disease.name AS disease_name,
  size((disease)-[:TREATS_CtD]-()) AS treats,
  size((disease)-[:PALLIATES_CpD]-()) AS palliates,
  size((disease)-[]-()) AS total_edges
ORDER BY disease_name
'''

disease_df = to_df(neo.cypher.execute(query))
disease_df.head(2)

Unnamed: 0,disease_id,disease_name,treats,palliates,total_edges
0,DOID:10652,Alzheimer's disease,3,0,199
1,DOID:9206,Barrett's esophagus,0,0,24


In [5]:
query = '''
MATCH (compound:Compound)
WHERE exists((compound)-[]-())
RETURN
  compound.identifier AS compound_id,
  compound.name AS compound_name,
  size((compound)-[:TREATS_CtD]-()) AS treats,
  size((compound)-[:PALLIATES_CpD]-()) AS palliates,
  size((compound)-[]-()) AS total_edges
ORDER BY compound_name
'''

compound_df = to_df(neo.cypher.execute(query))
compound_df.head(2)

Unnamed: 0,compound_id,compound_name,treats,palliates,total_edges
0,DB01048,Abacavir,1,0,4
1,DB05812,Abiraterone,1,0,9


In [6]:
compound_df.to_csv('compounds.tsv', sep='\t', index=False)
disease_df.to_csv('diseases.tsv', sep='\t', index=False)

## Retrieve indications from hetnet

In [7]:
indication_query = '''
MATCH (compound:Compound)-[rel]->(disease:Disease)
RETURN
  compound.identifier AS compound_id,
  compound.name AS compound_name,
  disease.identifier AS disease_id,
  disease.name AS disease_name,
  type(rel) AS rel_type
ORDER BY
  compound_name, rel_type DESC, disease_name
'''
indication_df = to_df(neo.cypher.execute(indication_query))
indication_df.head(2)

Unnamed: 0,compound_id,compound_name,disease_id,disease_name,rel_type
0,DB01048,Abacavir,DOID:635,acquired immunodeficiency syndrome,TREATS_CtD
1,DB05812,Abiraterone,DOID:10283,prostate cancer,TREATS_CtD


In [8]:
indication_df.to_csv('indications.tsv', sep='\t', index=False)