Using Graph Data Science Library (GDS) in Neo4j 

In [99]:
from neo4j import GraphDatabase

#use your own default url
url = "neo4j://localhost:11003"

#username is neo4j, password is your own password
driver = GraphDatabase.driver(url, auth=("neo4j", "Ima62186Ima@"))

First, create a named graph. We want to create a named graph out of the specific nodes that we want to identify.

Analysis of Drug, Pathway, Protein, Frag with the MENTIONS relationship.

@cypher:
    CALL gds.graph.create(
    'drug_analysis',
    ['Drug', 'Protien', 'Pathway', 'Frag'], 
    ['Mentions'],
    )

In [100]:
import pandas as pd

#confirms the connection
def confirmation_query(tx) -> str:
    """
    @param tx is the session that calls the function
    @return result is the string containing the query data
    """
    result = tx.run("CALL db.info()")
    return result.single()

def server_confirmation() -> str:
    """
    @return result is the string containing the info
    """
    with driver.session() as session:
        result = session.write_transaction(confirmation_query)
        return result

#checks for gds library
def gds_confirmation_query(tx) -> str:
    """
    @return result is the query result 
    """
    result = tx.run("RETURN gds.version()")
    gdslist = tx.run("CALL gds.list()")
    return result.single(), gdslist.data()

def gds_confirmation() -> str:
    """
    @return result is the validation of gds library
    """
    with driver.session() as session:
        result, gdslist = session.write_transaction(gds_confirmation_query)
        return result, gdslist

server_info = server_confirmation()
gds_confirmation, gdslist = gds_confirmation()

gdslist = pd.DataFrame(gdslist)

print("server info: \n", server_info, '\n', type(server_info))
print("\n")
print("gds confirmation: \n", gds_confirmation)
print("list of algorithms: \n", gdslist)



server info: 
 <Record id='91657971FD7D672FB17703D0A41F45126BC1D5102F7BEFFECAEB0914834CB692' name='neo4j' creationDate='2020-11-05T04:33:52.238Z'> 
 <class 'neo4j.data.Record'>


gds confirmation: 
 <Record gds.version()='1.4.0'>
list of algorithms: 
                                     name  ...       type
0      gds.alpha.allShortestPaths.stream  ...  procedure
1           gds.alpha.articleRank.stream  ...  procedure
2            gds.alpha.articleRank.write  ...  procedure
3                   gds.alpha.bfs.stream  ...  procedure
4    gds.alpha.closeness.harmonic.stream  ...  procedure
..                                   ...  ...        ...
197                    gds.util.infinity  ...   function
198                    gds.util.isFinite  ...   function
199                  gds.util.isInfinite  ...   function
200                gds.util.nodeProperty  ...   function
201                          gds.version  ...   function

[202 rows x 4 columns]


PageRank Algorithm: measures importance of each node within the graph, and importance of corresponding
source nodes

Considerations:
-   If there are no relationships from within a group of pages to outside the group, then the group is considered a spider trap.
-   Rank sink can occur when a network of pages is forming an infinite cycle
-   Dead-ends occur when pages have no outgoing relationship.

In [90]:
from neo4j import GraphDatabase

class pagerank:
    def __init__(self) -> None:
        self.driver = GraphDatabase.driver("neo4j://localhost:11003", auth=("neo4j", "Ima62186Ima@"))

    def close(self) -> None:
        self.driver.close()

    #estimation for memory
    @classmethod
    def memory_estimation(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @returns the data for the memory
        """
        query = ("Call gds.pageRank.write.estimate('drug_analysis', {writeProperty: 'pageRank', maxIterations: 30, dampingFactor: 0.85}) YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory")
        result = tx.run(query)
        return result.single()

    @classmethod
    def pagerank(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @return result.data() is the data of the pagerank
        """
        query = ("Call gds.pageRank.stream('drug_analysis') YIELD nodeId, score RETURN gds.util.asNode(nodeId).name AS name, score ORDER BY score DESC, name ASC")
        result = tx.run(query)
        return result.data()


    def estimate_pagerank(self) -> any:
        """
        @param self
        @return result is the result of the memory estimation
        """
        result = self.driver.session().write_transaction(self.memory_estimation)
        return result


    def run_pagerank(self) -> any:
        """
        @param self
        @return result is the dataframe from the pagerank
        """
        result = self.driver.session().write_transaction(self.pagerank)
        result = pd.DataFrame(result)
        return result
            


In [91]:
import pandas as pd


driver = pagerank()
result = driver.estimate_pagerank()
#result = pd.DataFrame(result)
print('Pagerank Info: ', result)
print('--------------------------------')

pagerank = driver.run_pagerank()
print(pagerank)



Pagerank Info:  <Record nodeCount=7151 relationshipCount=1732 bytesMin=230168 bytesMax=230168 requiredMemory='224 KiB'>
--------------------------------
              name      score
0          disease  13.579986
1        apoptosis  11.104366
2     nitric_oxide   9.253497
3          release   6.731126
4       metabolism   6.673752
...            ...        ...
7146          None   0.150000
7147          None   0.150000
7148          None   0.150000
7149          None   0.150000
7150          None   0.150000

[7151 rows x 2 columns]
