# Part D

## Setup

In [1]:
from py2neo import Graph, ClientError

In [2]:
#set connection variables
PORT = "7687" #database running on this port for bolt connections
USER = "neo4j" #standard user
PASSWORD = "publication-graph" #db password

In [3]:
#connect to database
try:
    graph = Graph('bolt://localhost:'+PORT, auth=(USER, PASSWORD))
    print('SUCCESS: Connected to the Neo4j Database.')
except Exception as e:
    print('ERROR: Could not connect to the Neo4j Database. See console for details.')
    raise SystemExit(e)

SUCCESS: Connected to the Neo4j Database.


In [4]:
# query helper function
def run_query(query:str):
    try:
        return graph.run(query)
    except ClientError as e:
        print(e.message)

## 1. Page Rank

In [5]:
# graph projection
projection_page_rank = """
CALL gds.graph.project(
  'paper-rank',
  'Paper',
  'CITES'
)"""

In [6]:
run_query(projection_page_rank)

nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
"{Paper: {label: 'Paper', properties: {}}}","{CITES: {orientation: 'NATURAL', indexInverse: false, aggregation: 'DEFAULT', type: 'CITES', properties: {}}}",paper-rank,27621,269541,4197


In [7]:
#page rank computation, parameters: 25 Iterations, dampening factor - 0.85 (standard value)

In [8]:
page_rank_stream = """
CALL gds.pageRank.stream('paper-rank')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).title AS title, score
ORDER BY score DESC, title ASC
"""

In [9]:
cursor = run_query(page_rank_stream)
df = cursor.to_data_frame()

In [10]:
df.head(10)

Unnamed: 0,title,score
0,Handling shared resources in a temporal data b...,135.675282
1,Can entity-based information systems live with...,128.706558
2,An environment for development of decision sup...,127.707872
3,Classification as a paradigm for computing (ab...,112.368444
4,A reduced dimension branch-and-bound algorithm...,101.967724
5,An inclusive and extensible architecture for e...,101.967724
6,Seven Challenges.,101.967724
7,"The Expressive Power of Urgent, Lazy and Busy-...",101.967724
8,Top-secret Multi-Agent Systems.,101.967724
9,Information structuring and its implementation...,97.527169


## 2. Node similarity (Jaccard similarity)

In [19]:
projection_jaccard = """
CALL gds.graph.project(
    "similarity_of_papers2",
    ["Paper", "Keyword"],
    "MAIN_TOPIC")
YIELD
    graphName as graph
"""

jaccard_stream = """
CALL gds.nodeSimilarity.stream("similarity_of_papers2")
YIELD node1, node2, similarity
WITH gds.util.asNode(node1) AS paper1, gds.util.asNode(node2) AS paper2, similarity
WHERE paper1 <> paper2
RETURN paper1.title as paper1, paper2.title AS paper2, id(paper1), id(paper2), similarity
ORDER BY similarity DESC, paper1, paper2
"""

In [20]:
run_query(projection_jaccard)

Failed to invoke procedure `gds.graph.project`: Caused by: java.lang.IllegalArgumentException: A graph with name 'similarity_of_papers2' already exists.


---

In [21]:
cursor_jaccard = run_query(jaccard_stream)
df_jaccard = cursor_jaccard.to_data_frame()

In [22]:
df_jaccard.head(10)

Unnamed: 0,paper1,paper2,id(paper1),id(paper2),similarity
0,Abstract Interpretation Based Verification of ...,Abstract Interpretation based Verification of ...,247621,264772,1.0
1,Abstract Interpretation based Verification of ...,Abstract Interpretation Based Verification of ...,264772,247621,1.0
2,Advances in Information Science.,Advances in Information Science.,228641,233835,1.0
3,Advances in Information Science.,Advances in Information Science.,233835,228641,1.0
4,An Overview of C,An overview of C++.,250404,240450,1.0
5,An overview of C++.,An Overview of C,240450,250404,1.0
6,Applying Active Methodologies for Teaching Sof...,Applying Active Methodologies for Teaching Sof...,206404,207652,1.0
7,Applying Active Methodologies for Teaching Sof...,Applying Active Methodologies for Teaching Sof...,207652,206404,1.0
8,Book Review.,Book review.,230606,230530,1.0
9,Book Review.,Book review.,230606,233721,1.0
