
## Importing Libraries


In [58]:
import os
from dotenv import load_dotenv
from graphdatascience import GraphDataScience

## Load and retreive neo4j variables from .env file

In [59]:
load_dotenv()

#Storing Neo4j connection details in variables
URI = os.getenv("NEO4J_URI")
USER = os.getenv("NEO4J_USERNAME")
PASSWORD = os.getenv("NEO4J_PASSWORD")
DB_NAME = os.getenv("NEO4J_DATABASE", "neo4j")

## Initialize the Graph Data Science client

In [60]:
gds = GraphDataScience(
    URI,
    auth=(USER, PASSWORD),
    database=DB_NAME
)

In [61]:

print("Connected to Neo4j GDS server version:", gds.version())

Connected to Neo4j GDS server version: 2.23.0


In [62]:
print("Available GDS algorithms (first few rows):")
print(gds.list().head())

Available GDS algorithms (first few rows):
                                         name  \
0           gds.allShortestPaths.delta.mutate   
1  gds.allShortestPaths.delta.mutate.estimate   
2            gds.allShortestPaths.delta.stats   
3   gds.allShortestPaths.delta.stats.estimate   
4           gds.allShortestPaths.delta.stream   

                                         description  \
0  The Delta Stepping shortest path algorithm com...   
1  Returns an estimation of the memory consumptio...   
2  The Delta Stepping shortest path algorithm com...   
3  Returns an estimation of the memory consumptio...   
4  The Delta Stepping shortest path algorithm com...   

                                           signature       type  
0  gds.allShortestPaths.delta.mutate(graphName ::...  procedure  
1  gds.allShortestPaths.delta.mutate.estimate(gra...  procedure  
2  gds.allShortestPaths.delta.stats(graphName :: ...  procedure  
3  gds.allShortestPaths.delta.stats.estimate(grap...  procedu

## Graph Projections


In [63]:
# Cleanup existing graph projection if it exists
if gds.graph.exists("foodWebGraph").exists:
    gds.graph.drop("foodWebGraph")

In [64]:
# First projection for directed analyses (degree, betweenness, closeness, communities)
G, proj_result = gds.graph.project(
    "foodWebGraph",
    {
        "Taxa": {
            "properties": [
                "taxon_id"
            ]
        }
    },
    {
        "eaten_by": {
            "orientation": "NATURAL",   # direction stays Predator -> Prey
            "properties": [
                "latitude",
                "longitude"
                ]
        }
    }
)


In [65]:
print(f"Projected {G.node_count()} nodes and {G.relationship_count()} relationships for directed analyses.\n")

Projected 3320 nodes and 2890 relationships for directed analyses.



### Degree centrality

In [66]:
# Degree centrality
degree_centrality = gds.degree.write(
    G, 
    writeProperty="degree"
)

print("Degree centrality computed and written to the graph as 'degree' property.\n")

Degree centrality computed and written to the graph as 'degree' property.



### Betweenness centrality

In [67]:
# Betweenness centrality
betweenness_centrality = gds.betweenness.write(
    G, 
    writeProperty="betweenness"
)
print("Betweenness centrality computed and written to the graph as 'betweenness' property.\n")


Betweenness centrality computed and written to the graph as 'betweenness' property.



### Closeness centrality

In [68]:
# Closeness centrality
closeness_centrality = gds.closeness.write(
    G, 
    writeProperty="closeness"
)
print("Closeness centrality computed and written to the graph as 'closeness' property.\n")


Closeness centrality computed and written to the graph as 'closeness' property.



### Detecting communities using louvian method:

What is Louvian Method for reference:

In [69]:
# Using Louvain method for community detection
communities = gds.louvain.write(
    G,
    writeProperty="community"
)
print("Communities detected using Louvain method and written to the graph as 'community' property.\n")

 Louvain: 100%|██████████| 100.0/100 [00:02<00:00, 40.46%/s, status: FINISHED]                                                                         

Communities detected using Louvain method and written to the graph as 'community' property.






In [100]:
#Identify cyclic regions (SCCs)
cycles = gds.scc.write(G, writeProperty='sccId')

## Retriving the calculated metrics

### Statistic Summary

In [70]:
# Statistic summary
stats_query = """
MATCH (n:Taxa)
RETURN 
    'Degree' as metric,
    min(n.degree) as min,
    max(n.degree) as max,
    avg(n.degree) as avg,
    count(n) as count
UNION
MATCH (n:Taxa)
RETURN 
    'Betweenness' as metric,
    min(n.betweenness) as min,
    max(n.betweenness) as max,
    avg(n.betweenness) as avg,
    count(n) as count
UNION
MATCH (n:Taxa)
RETURN 
    'Closeness' as metric,
    min(n.closeness) as min,
    max(n.closeness) as max,
    avg(n.closeness) as avg,
    count(n) as count
"""

In [71]:
# Run the query and get results as a pandas DataFrame
stats_df = gds.run_cypher(stats_query)

print("\nMetric Statistics:")
print(stats_df)


Metric Statistics:
        metric  min    max       avg  count
0       Degree  0.0   38.0  0.870482   3320
1  Betweenness  0.0  649.5  0.688554   3320
2    Closeness  0.0    1.0  0.442194   3320


In [72]:
# Query to retrieve calculated metrics for each node
metrics_query = """
MATCH (n:Taxa)
RETURN n.taxon_id as taxon_id,
       n.common_name as common_name,
       n.scientific_name as scientific_name,
       n.degree as degree,
       n.betweenness as betweenness,
       n.closeness as closeness,
       n.community as community
ORDER BY n.betweenness DESC
LIMIT 10
"""

In [73]:
# Run the query and get results as a pandas DataFrame
metrics_df = gds.run_cypher(metrics_query)


In [74]:

print("\nTop 10 species by betweenness centrality:")
print(metrics_df)


Top 10 species by betweenness centrality:
   taxon_id                       common_name  \
0   47219.0                 Western Honey Bee   
1   61355.0              Western Yellowjacket   
2  199400.0                         Siam weed   
3  676794.0              Common Dotted Border   
4   46017.0             Eastern Gray Squirrel   
5   12727.0                    American Robin   
6   46260.0             American Red Squirrel   
7   61495.0                  Eastern Pondhawk   
8  625887.0  Southern Brown-hooded Kingfisher   
9  483731.0                              None   

                   scientific_name  degree  betweenness  closeness  community  
0                   Apis mellifera    14.0        649.5   0.976190       1515  
1             Vespula pensylvanica     3.0        132.0   0.511628        215  
2              Chromolaena odorata     5.0        130.0   0.490566       2535  
3      Mylothris agathina agathina     2.0        104.0   0.464286       2535  
4             Sci

In [75]:
print("\nTop 10 species with details:")
print(metrics_df)


Top 10 species with details:
   taxon_id                       common_name  \
0   47219.0                 Western Honey Bee   
1   61355.0              Western Yellowjacket   
2  199400.0                         Siam weed   
3  676794.0              Common Dotted Border   
4   46017.0             Eastern Gray Squirrel   
5   12727.0                    American Robin   
6   46260.0             American Red Squirrel   
7   61495.0                  Eastern Pondhawk   
8  625887.0  Southern Brown-hooded Kingfisher   
9  483731.0                              None   

                   scientific_name  degree  betweenness  closeness  community  
0                   Apis mellifera    14.0        649.5   0.976190       1515  
1             Vespula pensylvanica     3.0        132.0   0.511628        215  
2              Chromolaena odorata     5.0        130.0   0.490566       2535  
3      Mylothris agathina agathina     2.0        104.0   0.464286       2535  
4             Sciurus caroline

### Community Statistics

In [76]:

community_stats_query = """
MATCH (n:Taxa)
WITH n.community as community, count(*) as size
RETURN 
    community,
    size
ORDER BY size DESC
"""

In [77]:
# Run the query and get results as a pandas DataFrame
community_df = gds.run_cypher(community_stats_query)

In [78]:
print("\nTop largest communities:")
print(community_df)


Top largest communities:
     community  size
0         1302   303
1          215   162
2         2861   160
3         1515   152
4         2535   141
..         ...   ...
842       3245     1
843       3251     1
844       3261     1
845       3296     1
846       3310     1

[847 rows x 2 columns]


## Finding long chains using APOC path expansion

In [None]:
long_chain = """
MATCH (s:Taxa)
CALL apoc.path.expandConfig(s, {
  relationshipFilter: '<eaten_by',
  minLevel: 4,
  maxLevel: 50,
  uniqueness: 'NODE_GLOBAL',
  bfs: false,
  limit: 100000
}) YIELD path
WITH path, length(path) AS len
RETURN apoc.text.join([n IN nodes(path) | n.scientific_name], ' -> ') AS chain, len 
ORDER BY len DESC
LIMIT 50;

"""

In [96]:
long_chain_df = gds.run_cypher(long_chain)
print("\nTop 10 longest food chains found:")
print(long_chain_df)


Top 10 longest food chains found:
                                                chain  len
0   Eccopsis incultana -> Lippia javanica -> Mylot...    5
1   Eccopsis incultana -> Lippia javanica -> Mylot...    5
2   Eccopsis incultana -> Lippia javanica -> Mylot...    5
3   Eccopsis incultana -> Lippia javanica -> Mylot...    5
4   Eccopsis incultana -> Lippia javanica -> Mylot...    5
5   Eccopsis incultana -> Lippia javanica -> Mylot...    5
6   Eccopsis incultana -> Lippia javanica -> Mylot...    5
7   Eccopsis incultana -> Lippia javanica -> Mylot...    5
8   Eccopsis incultana -> Lippia javanica -> Mylot...    5
9   Eccopsis incultana -> Lippia javanica -> Mylot...    5
10  Pantherophis spiloides -> Strix varia -> Tamia...    5
11  Pantherophis spiloides -> Strix varia -> Tamia...    5
12  Lippia javanica -> Mylothris agathina agathina...    4
13  Lippia javanica -> Mylothris agathina agathina...    4
14  Melanerpes lewis -> Bombus griseocollis -> Asc...    4
15  Neoscona oaxacens

In [103]:
display_cycles = """
MATCH (n:Taxa)
WITH n.sccId AS scc, count(*) AS size
WHERE size > 1
RETURN scc, size
ORDER BY size DESC
LIMIT 20;
"""

In [104]:
display_cycles_df = gds.run_cypher(display_cycles)
print("\nTop 20 cycles:")
print(display_cycles_df)


Top 20 cycles:
     scc  size
0   3002     6
1    173     3
2     11     2
3    243     2
4    333     2
5    381     2
6    443     2
7   1306     2
8   1576     2
9   1703     2
10  2011     2
