In [1]:
from neo4j import GraphDatabase 
import pandas as pd
from tqdm import tqdm
import ast

url = "bolt://localhost:7687" 

driver = GraphDatabase.driver(url, auth=("neo4j", "heart"))

# Node Sim

Creating in-memory graph with cypher projection:

@cypher:

    CALL gds.graph.create(
    'OS KG',
        ["MeSH", "Protein", "Document", "Drug"],
        {
            MENTIONS: {orientation: 'UNDIRECTED'},
            TARGET: {orientation: 'UNDIRECTED'},
            ASSIGNS: {orientation: 'UNDIRECTED'}
        }
    )

In [2]:
def count_nodes():
    query = "MATCH (n) RETURN COUNT(n)"
    with driver.session() as session:
        info = session.run(query)
    return info

count_nodes()


<neo4j.work.result.Result at 0x7f80d0c5bfd0>

# Node Similarity for OS and CVD

@cypher:

    CALL gds.graph.create(
    'stuff' ,
        ["MeSH", "Document", "Protein", "Drug", "Pathway"],
        {
            MENTIONS: {orientation: 'UNDIRECTED'},
            TARGET: {orientation: 'UNDIRECTED'},
            ASSIGNS: {orientation: 'UNDIRECTED'}, 
            CANDIDATE: {orientation: 'UNDIRECTED'}
        }
    )

In [5]:
class nodesim():
    """Class to run LinkPred"""
    def __init__(self) -> None:
        self.driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "heart"))

    def close(self) -> None:
        self.driver.close()

    @classmethod
    def algo(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @return result.data() is the data of the cluster
        """
        query = ("""
        CALL gds.nodeSimilarity.stream('stuff')
        YIELD node1, node2, similarity
        RETURN gds.util.asNode(node1).name AS MeSH_Term1, gds.util.asNode(node1).type AS Type1, similarity, gds.util.asNode(node2).type AS Type2, gds.util.asNode(node2).name AS MeSH_Term2
        ORDER BY similarity DESCENDING, MeSH_Term1, MeSH_Term2
        """)
        result = tx.run(query)
        return result.data()


    def run_algo(self) -> any:
        """
        @param self
        @return result is the dataframe from the pagerank
        """
        result = self.driver.session().write_transaction(self.algo)
        result = pd.DataFrame(result)
        return result

In [6]:
#3 min runtime
algo = nodesim()
result = algo.run_algo()
#extract nodes whose similarity is 1
types = ["OS", "CVD"]
CVD_OS = result[result["Type1"].isin(types)]
CVD_OS = CVD_OS[result["Type2"].isin(types)]
#result.dropna()
CVD_OS = CVD_OS.reset_index(drop=True)
CVD_OS

KeyboardInterrupt: 

In [None]:
#filtering out CVD and OS to compare them:
compare = CVD_OS[CVD_OS["Type1"] != CVD_OS["Type2"]].reset_index(drop=True)
compare.head()
compare

In [None]:
#compare.to_csv("OS_CVD_Nodesim.csv")