# **Environment Setup**

In [None]:
try:
    from graphdatascience import GraphDataScience
except:
    !pip install graphdatascience
    from graphdatascience import GraphDataScience

from graphdatascience.session import SessionMemory, DbmsConnectionInfo, GdsSessions, AuraAPICredentials, AlgorithmCategory
from datetime import timedelta
from getpass import getpass

# **Connect to GDS**

Enter your Aura API credentials to be used for creating the compute for your GDS session.

In [None]:
client_id = getpass("Aura client id")
client_secret = getpass("Aura client secret")

Aura client id··········
Aura client secret··········


Create the sessions object.


In [None]:
sessions = GdsSessions(api_credentials=AuraAPICredentials(client_id, client_secret))

Enter the connection information for your source database.

In [None]:
db_uri = "neo4j+s://81c642ac.databases.neo4j.io"
db_user = getpass("database user")
db_password = getpass("database password")

database user··········
database password··········


Estimate the RAM for the session that we will create.

In [None]:
memory = sessions.estimate(
    node_count=900000,
    relationship_count=4000000,
    algorithm_categories=[AlgorithmCategory.CENTRALITY],
)

Create the session.

In [None]:
db_connection = DbmsConnectionInfo(db_uri, db_user, db_password)
gds = sessions.get_or_create(
    session_name="graph_catalog_lab",
    memory=memory,
    ttl=timedelta(minutes=45),
    db_connection=db_connection
)

# **Exercise 1: Simple projections**

### 1a: Create an in-memory graph projection that contains Article nodes and CITES relationships
Projection to GDS session documentation: https://neo4j.com/docs/graph-data-science-client/current/gds-session/#_syntax_2

Explore the graph with Neo4j Browser in a separate tab if you don't recall the data model

In [None]:
query = """your code here"""

g_citations, result = gds.graph.project("citations", query)

In [None]:
#@title Solution:
query = """MATCH (a1:Article)-[:CITES]->(a2:Article) RETURN gds.graph.project.remote(a1, a2)"""

g_citations, result = gds.graph.project("citations", query)

result

 Graph creation from Triplets:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,0
nodeCount,73810
relationshipCount,134778
status,DONE
host,p-81c642ac-dfef-0006.production-orch-0996.neo4...


Validate that the memory usage, node count, relationship count,and included properties of the projected graph are as expected:

In [None]:
g_citations.memory_usage()

'9646 KiB'

In [None]:
g_citations.node_count()

np.int64(73810)

In [None]:
g_citations.relationship_count()

np.int64(134778)

###1b: Create a projection that contains Author nodes, Article nodes, and WROTE relationships. Use a data configuration map in your projeciton Cypher query to include the Author and Article node labels as part of the projection.

See this documentation for data configuration syntax: https://neo4j.com/docs/graph-data-science/current/management-ops/graph-creation/graph-project-cypher-projection/#graph-project-cypher-projection-syntax

In [None]:
query = """your code here"""

g_author_article, result = #your code here

In [None]:
#@title Solution:
query = """cypher runtime=parallel
           MATCH (au:Author)-[:WROTE]->(ar:Article)
           RETURN gds.graph.project.remote(
                au, ar,
                {
                  sourceNodeLabels: "Author",
                  targetNodeLabels: "Article"
                })"""

g_author_article, result = gds.graph.project("author_article", query)
result

 Graph creation from Triplets:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,0
nodeCount,860743
relationshipCount,1118301
status,DONE
host,p-81c642ac-dfef-0006.production-orch-0996.neo4...


Validate that the memory usage, node count, relationship count,and included properties of the projected graph are as expected:

In [None]:
g_author_article.memory_usage()

'52 MiB'

In [None]:
g_author_article.node_count()

np.int64(860743)

In [None]:
g_author_article.relationship_count()

np.int64(1118301)

# **Exercise 2: Projection with calculated properties and undirected relationships**

### 2a: Create a projection that includes Journal nodes, Author nodes, and PUBLISHED_IN relationships.
If there are multiple relationships between an Author and a Journal, project them as a single relationship with a *publicationCount* property representing the number of relationships. Remember that you can use a Cypher `WITH` clause to calculate values before passsing them to `gds.graph.project.remote()`.

See this documentation for the Cypher `WITH` clause:
https://neo4j.com/docs/cypher-manual/current/clauses/with/

In [None]:
query = """your code here"""

g_author_journal, result = #your code here

In [None]:
#@title Solution:
query = """cypher runtime=parallel
           MATCH (a:Author)-[:PUBLISHED_IN]->(j:Journal)
           WITH a, j, count(*) AS publicationCount
           RETURN gds.graph.project.remote(
                a, j,
                {
                  sourceNodeLabels: "Author",
                  targetNodeLabels: "Article",
                  relationshipType: "PUBLISHED_IN",
                  relationshipProperties: {publicationCount: publicationCount}
                })"""


g_author_journal, result = gds.graph.project("author_journal", query)

result

 Graph creation from Triplets:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,0
nodeCount,428507
relationshipCount,857934
status,DONE
host,p-81c642ac-dfef-0006.production-orch-0996.neo4...


###Check output the minimum, maximum, and average number of times an author has published in each journal, as well as the total number of PUBLISHED_IN relationships after aggregation.

In [None]:
pub_count_df = gds.graph.relationshipProperty.stream(g_author_journal, "publicationCount", ["PUBLISHED_IN"])

In [None]:
pub_count_df['propertyValue'].describe()

Unnamed: 0,propertyValue
count,857934.0
mean,1.303481
std,1.00338
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,87.0


### 2b: Create a new projected graph which includes Article and Author nodes. Change the label of the Author nodes to Person. Also Include the WROTE relationships, but make them undirected.
Remote Projection documentation: https://neo4j.com/docs/graph-data-science-client/current/gds-session/#_syntax_2

In [None]:
#your code here

In [None]:
#@title Solution:
query = """cypher runtime=parallel
           MATCH (au:Author)-[:WROTE]->(ar:Article)
           RETURN gds.graph.project.remote(
              au, ar,
              {
                sourceNodeLabels: "Person",
                targetNodeLabels: "Article",
                relationshipType: "WROTE"
              }
           )"""

g_article_person, result = gds.graph.project(
    "article_person",
    query,
    undirected_relationship_types=["WROTE"]
)

result

 Graph creation from Triplets:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,0
nodeCount,860743
relationshipCount,2236602
status,DONE
host,p-81c642ac-dfef-0006.production-orch-0996.neo4...


# **Exercise 3: Filtered Cypher projections**


### 3a Create a graph that that includes Authors and Articles with WROTE relationships, but only for articles that were published in the journal with title *The Journal of biological chemistry*.

Start by writing a Cypher query that returns the data as a pandas data frame to validate your Cypher syntax.

In [None]:
gds.run_cypher("""your code here""")

In [None]:
 #@title Solution:
 gds.run_cypher("""
    MATCH (j:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a:Article),
      (au:Author)-[:WROTE]->(a)
    RETURN au.fullName, a.title, j.title
""")

Unnamed: 0,au.fullName,a.title,j.title
0,D L Brautigan,Correlation of the kinetics of electron transf...,The Journal of biological chemistry
1,E Margoliash,Correlation of the kinetics of electron transf...,The Journal of biological chemistry
2,S Ferguson-Miller,Correlation of the kinetics of electron transf...,The Journal of biological chemistry
3,R Warren,Transfer of the hepatocyte receptor for serum ...,The Journal of biological chemistry
4,D Doyle,Transfer of the hepatocyte receptor for serum ...,The Journal of biological chemistry
...,...,...,...
11742,J H Mulligan,Transport and metabolism of vitamin B6 in Salm...,The Journal of biological chemistry
11743,T Tsuchiya,Calcium transport driven by a proton gradient ...,The Journal of biological chemistry
11744,B P Rosen,Calcium transport driven by a proton gradient ...,The Journal of biological chemistry
11745,B Shane,Transport and metabolism of vitamin B6 in the ...,The Journal of biological chemistry


### 3b: Modify the query to add a Cypher projection step

In [None]:
query = """your code here"""

g_biochem_authors, result = gds.graph.cypher.project("biochem_authors", query)

result

In [None]:
#@title Solution:
query = """
    MATCH (:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a:Article),
      (au:Author)-[:WROTE]->(a)
    RETURN gds.graph.project.remote(au, a)
"""
g_biochem_authors, result = gds.graph.project("biochem_authors", query)

result

Unnamed: 0,0
nodeCount,11518
relationshipCount,11747
status,DONE
host,p-81c642ac-dfef-0006.production-orch-0996.neo4...


# Exercise 4 (bonus): Implied relationships

## 4a The (:Author)-[:WROTE]->(:Article) graph is a bipartite graph. Transform it into a monoparite graph projection that includes only Article nodes. Include only articles in the journal *The Journal of biological chemistry*.

Include an inferred `SHARES_AUTHOR` relationship between articles that have at least one author in common.

Because this relationship is logically undirected, filter your results so that the source of the `SHARES_AUTHOR` relationship has a `pmid` property lower than the target of the relationship. This will avoid duplicate relationships. Remember to indicated that the `SHARES_AUTHOR` relationship type is undirected when you create the projection.

Include a `sharedAuthorCount` property that indicates the number of shared authors between the papers.

In [None]:
query = """your code here"""

g_biochem_authors, result = gds.graph.cypher.project("biochem_authors", query)

result

In [None]:
#@title Solution:
query = """
    cypher runtime=parallel
    MATCH (j:Journal {title: "The Journal of biological chemistry"})<-[:IN_JOURNAL]-(a1:Article),
    (a1)<-[:WROTE]-()-[:WROTE]->(a2:Article)-[:IN_JOURNAL]->(j)
    WHERE a1.pmid < a2.pmid
    WITH a1, a2, count(*) AS sharedAuthorCount
    RETURN gds.graph.project.remote(
      a1, a2,
      {
        sourceNodeLabels: "Article",
        targetNodeLabels: "Article",
        relationshipType: "SHARES_AUTHOR",
        relationshipProperties: {sharedAuthorCount: sharedAuthorCount}
      }
    )
"""

g_shared_authors, result = gds.graph.project("shared_authors", query, undirected_relationship_types=["SHARES_AUTHOR"])

result

 Graph creation from Triplets:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,0
nodeCount,3287
relationshipCount,15430
status,DONE
host,p-81c642ac-dfef-0006.production-orch-0996.neo4...


# Exercise 5: Cleaning up the catalog and session

### 5a: List all the graphs we have created thus far

In [None]:
graph_list = #your code here
graph_list

In [None]:
#@title Solution:
graph_list = gds.graph.list()
graph_list

Unnamed: 0,degreeDistribution,graphName,database,databaseLocation,memoryUsage,sizeInBytes,nodeCount,relationshipCount,configuration,density,creationTime,modificationTime,schema,schemaWithOrientation
0,"{'min': 0, 'max': 96, 'p90': 4, 'p999': 25, 'p...",author_journal,neo4j,remote,63 MiB,66692296,428507,857934,"{'readConcurrency': 4, 'jobId': '1fc08d71-e338...",5e-06,2025-05-05T22:35:11.174827653+00:00,2025-05-05T22:35:11.174827653+00:00,"{'graphProperties': {}, 'nodes': {'Author': {}...","{'graphProperties': {}, 'nodes': {'Author': {}..."
1,"{'min': 1, 'max': 183, 'p90': 5, 'p999': 34, '...",article_person,neo4j,remote,56 MiB,59489456,860743,2236602,"{'readConcurrency': 4, 'jobId': '86566ee1-309d...",3e-06,2025-05-05T22:35:23.338753288+00:00,2025-05-05T22:35:23.338753288+00:00,"{'graphProperties': {}, 'nodes': {'Person': {}...","{'graphProperties': {}, 'nodes': {'Person': {}..."
2,"{'min': 0, 'max': 20, 'p90': 2, 'p999': 14, 'p...",biochem_authors,neo4j,remote,4838 KiB,4954120,11518,11747,"{'readConcurrency': 4, 'jobId': 'ff3ee726-bdc1...",8.9e-05,2025-05-05T22:35:34.830329848+00:00,2025-05-05T22:35:34.830329848+00:00,"{'graphProperties': {}, 'nodes': {'__ALL__': {...","{'graphProperties': {}, 'nodes': {'__ALL__': {..."
3,"{'min': 0, 'max': 183, 'p90': 3, 'p999': 34, '...",author_article,neo4j,remote,52 MiB,55294920,860743,1118301,"{'readConcurrency': 4, 'jobId': '71fc73eb-bdf2...",2e-06,2025-05-05T22:34:35.339661484+00:00,2025-05-05T22:34:35.339661484+00:00,"{'graphProperties': {}, 'nodes': {'Author': {}...","{'graphProperties': {}, 'nodes': {'Author': {}..."
4,"{'min': 1, 'max': 35, 'p90': 11, 'p999': 22, '...",shared_authors,neo4j,remote,36 MiB,37977216,3287,15430,"{'readConcurrency': 4, 'jobId': 'f958baeb-0136...",0.001429,2025-05-05T22:35:49.700963864+00:00,2025-05-05T22:35:49.700963864+00:00,"{'graphProperties': {}, 'nodes': {'Article': {...","{'graphProperties': {}, 'nodes': {'Article': {..."
5,"{'min': 0, 'max': 146, 'p90': 6, 'p999': 34, '...",citations,neo4j,remote,9646 KiB,9878192,73810,134778,"{'readConcurrency': 4, 'jobId': '1eed88e4-7b00...",2.5e-05,2025-05-05T22:34:21.776084311+00:00,2025-05-05T22:34:21.776084311+00:00,"{'graphProperties': {}, 'nodes': {'__ALL__': {...","{'graphProperties': {}, 'nodes': {'__ALL__': {..."


## 5b Drop the *author_journal* graph from the catalog.

In [None]:
#your code here

In [None]:
#@title Solution:
gds.graph.drop("author_journal")

Unnamed: 0,0
graphName,author_journal
database,neo4j
databaseLocation,remote
memoryUsage,
sizeInBytes,-1
nodeCount,428507
relationshipCount,857934
configuration,"{'readConcurrency': 4, 'jobId': '1fc08d71-e338..."
density,0.000005
creationTime,2025-05-05T22:35:11.174827653+00:00


## 5c Delete the session. When the session is destroyed, graphs still in the catalog will also be deleted.

See the documentation for deleting a session. https://neo4j.com/docs/graph-data-science-client/current/gds-session/#_deleting_a_gds_session

In [None]:
#your code here

In [None]:
#@title Solution:
sessions.delete(session_name="graph_catalog_lab")

True