# **Environment Setup**

In [None]:
try:
    from graphdatascience import GraphDataScience
except:
    !pip install graphdatascience
    from graphdatascience import GraphDataScience

# **Connect to GDS**

Enter your VM IP and database password

In [None]:
bolt_url = "neo4j://<your-vm-ip>:8443"
password = "DS_Training"

Connect to Neo4j with the GDS Client:

In [None]:
gds = GraphDataScience(bolt_url, auth=('neo4j', password))

# **Exercise 1: Native projection**

### 1a: Use native projection to create an in-memory graph that contains Article nodes and CITES relationships
Native Projection documentation: https://neo4j.com/docs/graph-data-science/current/management-ops/graph-creation/graph-project/

Explore the graph with Neo4j Browser in a separate tab if you don't recall the data model

In [None]:
g_citations, result = #your code here

In [None]:
#@title Solution:
g_citations, result = gds.graph.project("citations", "Article", "CITES")
result

nodeProjection            {'Article': {'label': 'Article', 'properties':...
relationshipProjection    {'CITES': {'aggregation': 'DEFAULT', 'orientat...
graphName                                                         citations
nodeCount                                                            450000
relationshipCount                                                    134778
projectMillis                                                           652
Name: 0, dtype: object

Validate that the memory usage, node count, relationship count,and included properties of the projected graph are as expected:

In [None]:
g_citations.memory_usage()

'12555 KiB'

In [None]:
g_citations.node_count()

450000

In [None]:
g_citations.relationship_count()

134778

In [None]:
g_citations.node_properties("Article")

[]

###1b: Use native projection to create a projection that contains Author nodes, Article nodes, and WROTE relationships.
Native Projection documentation: https://neo4j.com/docs/graph-data-science/current/management-ops/graph-creation/graph-project/

In [None]:
g_author_article, result = #your code here

In [None]:
#@title Solution:
g_author_article, result = gds.graph.project("author_article", ["Author", "Article"], "WROTE")
result

nodeProjection            {'Author': {'label': 'Author', 'properties': {...
relationshipProjection    {'WROTE': {'aggregation': 'DEFAULT', 'orientat...
graphName                                                    author_article
nodeCount                                                            875119
relationshipCount                                                   1118301
projectMillis                                                           788
Name: 0, dtype: object

Validate that the memory usage, node count, relationship count,and included properties of the projected graph are as expected:

In [None]:
g_author_article.memory_usage()

'15191 KiB'

In [None]:
g_author_article.node_count()

875119

In [None]:
g_author_article.relationship_count()

1118301

# **Exercise 1.1: Native projection with maps**

### 1.1a: Create a projection that includes Journal nodes, Author nodes, and PUBLISHED_IN relationships.
If there are multiple relationships between an Author and a Journal, project them as a single relationship with a *count* property representing the number of relationships.

Native Projection documentation: https://neo4j.com/docs/graph-data-science/current/management-ops/graph-creation/graph-project/

In [None]:
g_author_journal, result = #your code here

In [None]:
#@title Solution:
g_author_journal, result = gds.graph.project("author_journal",
                                             ["Author", "Journal"],
                                             {"PUBLISHED_IN": {"properties":
                                                                   {"count":
                                                                       {"property":"*",
                                                                        "aggregation": "COUNT"}
                                                                  }
                                                              }
                                             })
result

nodeProjection            {'Journal': {'label': 'Journal', 'properties':...
relationshipProjection    {'PUBLISHED_IN': {'aggregation': 'DEFAULT', 'o...
graphName                                                    author_journal
nodeCount                                                            428535
relationshipCount                                                    857934
projectMillis                                                          1491
Name: 0, dtype: object

###Check output the minimum, maximum, and average number of times an author has published in each journal, as well as the total number of PUBLISHED_IN relationships after aggregation.

In [None]:
gds.run_cypher("""CALL gds.graph.relationshipProperty.stream(
      "author_journal",
      "count",
      ["PUBLISHED_IN"])
    YIELD
      sourceNodeId,
      targetNodeId,
      propertyValue
    RETURN min(propertyValue) AS minCount,
      max(propertyValue) AS maxCount,
      avg(propertyValue) AS avgCount,
      count(*) as relationshipCount
      """
      )

Unnamed: 0,minCount,maxCount,avgCount,relationshipCount
0,1.0,87.0,1.303481,857934


### 1.1b: Create a new projected graph which includes Article and Author nodes. Change the label of the Author nodes to Person. Also Include the WROTE relationships, but make them undirected.
Native Projection documentation: https://neo4j.com/docs/graph-data-science/current/management-ops/graph-creation/graph-project/

In [None]:
#your code here

In [None]:
#@title Solution:
g_article_person, result = gds.graph.project(
    "article_person",
    {
        "Article":{},
        "Person": {"label": "Author"}
    },
    {
        "WROTE": {"orientation": "UNDIRECTED"}
    })
result

nodeProjection            {'Person': {'label': 'Author', 'properties': {...
relationshipProjection    {'WROTE': {'aggregation': 'DEFAULT', 'orientat...
graphName                                                    article_person
nodeCount                                                            875119
relationshipCount                                                   2236602
projectMillis                                                           497
Name: 0, dtype: object

# **Exercise 2: Cypher projection**
### Create a graph that that includes Authors and Articles with WROTE relationships, but only for articles that were published in the journal with title *The Journal of biological chemistry*

Cypher Projection documentation here: https://neo4j.com/docs/graph-data-science/current/management-ops/graph-creation/graph-project-cypher-projection/

### 2a: Write  a Cypher query that fetches all of the desired elements

In [None]:
gds.run_cypher("""
    #your cypher here
""")

In [None]:
 #@title Solution:
 gds.run_cypher("""
    MATCH (j:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a:Article),
      (au:Author)-[:WROTE]->(a)
    RETURN au.fullName, a.title, j.title
""")

Unnamed: 0,au.fullName,a.title,j.title
0,D L Brautigan,Correlation of the kinetics of electron transf...,The Journal of biological chemistry
1,E Margoliash,Correlation of the kinetics of electron transf...,The Journal of biological chemistry
2,S Ferguson-Miller,Correlation of the kinetics of electron transf...,The Journal of biological chemistry
3,R Warren,Transfer of the hepatocyte receptor for serum ...,The Journal of biological chemistry
4,D Doyle,Transfer of the hepatocyte receptor for serum ...,The Journal of biological chemistry
...,...,...,...
11742,J H Mulligan,Transport and metabolism of vitamin B6 in Salm...,The Journal of biological chemistry
11743,T Tsuchiya,Calcium transport driven by a proton gradient ...,The Journal of biological chemistry
11744,B P Rosen,Calcium transport driven by a proton gradient ...,The Journal of biological chemistry
11745,B Shane,Transport and metabolism of vitamin B6 in the ...,The Journal of biological chemistry


### 2b: Modify the query to add a Cypher projection step

In [None]:
 g_biochem_authors, result = gds.graph.cypher.project("""
    #your cypher here
""")

In [None]:
 #@title Solution:
 g_biochem_authors, result = gds.graph.cypher.project("""
    MATCH (:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a:Article),
      (au:Author)-[:WROTE]->(a)
    RETURN gds.graph.project("biochem_authors", au, a)
""")
result

{'relationshipCount': 11747,
 'graphName': 'biochem_authors',
 'query': '\n    MATCH (:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a:Article),\n      (au:Author)-[:WROTE]->(a)\n    RETURN gds.graph.project("biochem_authors", au, a)\n',
 'projectMillis': 38,
 'configuration': {'readConcurrency': 4,
  'undirectedRelationshipTypes': [],
  'jobId': 'e9374a4b-2ef7-43ac-9629-a61333aa6f20',
  'logProgress': True,
  'query': '\n    MATCH (:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a:Article),\n      (au:Author)-[:WROTE]->(a)\n    RETURN gds.graph.project("biochem_authors", au, a)\n',
  'inverseIndexedRelationshipTypes': [],
  'creationTime': neo4j.time.DateTime(2024, 5, 29, 14, 59, 18, 183799253, tzinfo=<UTC>)},
 'nodeCount': 11518}

# Exercise 3: Cleaning up the catalog

### List all the graphs we have created thus far

In [None]:
graph_list = #your code here
graph_list

In [None]:
#@title Solution:
graph_list = gds.graph.list()
graph_list

Unnamed: 0,degreeDistribution,graphName,database,databaseLocation,memoryUsage,sizeInBytes,nodeCount,relationshipCount,configuration,density,creationTime,modificationTime,schema,schemaWithOrientation
0,"{'min': 0, 'max': 183, 'p90': 3, 'p999': 34, '...",author_article,neo4j,local,15191 KiB,15555856,875119,1118301,{'relationshipProjection': {'WROTE': {'aggrega...,1.460242e-06,2024-05-23T13:57:41.255936165+00:00,2024-05-23T13:57:42.046223964+00:00,"{'graphProperties': {}, 'nodes': {'Author': {}...","{'graphProperties': {}, 'nodes': {'Author': {}..."
1,"{'min': 0, 'max': 96, 'p90': 4, 'p999': 25, 'p...",author_journal,neo4j,local,69 MiB,72497424,428535,857934,{'relationshipProjection': {'PUBLISHED_IN': {'...,4.671779e-06,2024-05-23T14:01:12.471218062+00:00,2024-05-23T14:01:13.967505174+00:00,"{'graphProperties': {}, 'nodes': {'Journal': {...","{'graphProperties': {}, 'nodes': {'Journal': {..."
2,"{'min': 0, 'max': 20, 'p90': 2, 'p999': 14, 'p...",biochem_cypher_agg,neo4j,local,4837 KiB,4953792,11518,11747,"{'readConcurrency': 4, 'undirectedRelationship...",8.855448e-05,2024-05-23T14:10:14.358389107+00:00,2024-05-23T14:10:14.533616274+00:00,"{'graphProperties': {}, 'nodes': {'__ALL__': {...","{'graphProperties': {}, 'nodes': {'__ALL__': {..."
3,"{'min': 0, 'max': 146, 'p90': 0, 'p999': 20, '...",citations,neo4j,local,12555 KiB,12856848,450000,134778,{'relationshipProjection': {'CITES': {'aggrega...,6.655718e-07,2024-05-23T13:55:55.952433852+00:00,2024-05-23T13:55:56.611220238+00:00,"{'graphProperties': {}, 'nodes': {'Article': {...","{'graphProperties': {}, 'nodes': {'Article': {..."
4,"{'min': 0, 'max': 366, 'p90': 6, 'p999': 68, '...",all_citations,neo4j,local,19 MiB,20620880,878535,2821380,{'relationshipProjection': {'__ALL__': {'aggre...,3.655476e-06,2024-05-23T13:50:20.534540167+00:00,2024-05-23T13:50:24.399682794+00:00,"{'graphProperties': {}, 'nodes': {'__ALL__': {...","{'graphProperties': {}, 'nodes': {'__ALL__': {..."


## Drop the *author_journal* graph from the catalog.

In [None]:
#your code here

In [None]:
#@title Solution:
gds.graph.drop("author_journal")

graphName                                                   author_journal
database                                                             neo4j
databaseLocation                                                     local
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                           428535
relationshipCount                                                   857934
configuration            {'relationshipProjection': {'PUBLISHED_IN': {'...
density                                                           0.000005
creationTime                           2024-05-23T14:01:12.471218062+00:00
modificationTime                       2024-05-23T14:01:13.967505174+00:00
schema                   {'graphProperties': {}, 'nodes': {'Journal': {...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Journal': {...
Name: 0, dtype: object

### Use a python loop to return the names of all graphs in the graph catalog as a list, and then drop them.

In [None]:
#your code here

In [None]:
#@title Solution:
graph_list = gds.graph.list()
for graph_name in graph_list['graphName']:
    gds.graph.drop(graph_name)

Validate that all catalog graphs have been dropped

In [None]:
gds.graph.list()