In [1]:
#run this line if you need to re-install graph data science in a fresh environment
#!pip install graphdatascience

In [2]:
from graphdatascience import GraphDataScience

In [3]:
gds = GraphDataScience('bolt://3.86.239.25:7687', auth=('neo4j', 'canisters-amplifiers-bets'))

## Using Louvain Community Detection

In [4]:
# basic homogeneous directed projection
g, _  = gds.graph.project('proj', 'Airport', 'HAS_ROUTE')
_

nodeProjection            {'Airport': {'label': 'Airport', 'properties':...
relationshipProjection    {'HAS_ROUTE': {'orientation': 'NATURAL', 'aggr...
graphName                                                              proj
nodeCount                                                              3503
relationshipCount                                                     46389
projectMillis                                                            21
Name: 0, dtype: object

In [5]:
# estimate resources needed
gds.louvain.stats.estimate(g)

requiredMemory                                  [224 KiB ... 1701 KiB]
treeView             Memory Estimation: [224 KiB ... 1701 KiB]\n|--...
mapView              {'components': [{'components': [{'memoryUsage'...
bytesMin                                                        229921
bytesMax                                                       1742472
nodeCount                                                         3503
relationshipCount                                                46389
heapPercentageMin                                                  0.1
heapPercentageMax                                                  0.1
Name: 0, dtype: object

In [6]:
# run in stats mode to see overall statistics
gds.louvain.stats(g)

Louvain:   0%|          | 0/100 [00:00<?, ?%/s]

modularity                                                        0.557322
modularities             [0.5236293616663739, 0.5436734881801065, 0.547...
ranLevels                                                               10
communityCount                                                        1094
communityDistribution    {'p99': 13, 'min': 1, 'max': 697, 'mean': 3.20...
postProcessingMillis                                                     3
preProcessingMillis                                                      0
computeMillis                                                         4887
configuration            {'jobId': 'f2c7df0a-3ed9-403d-ba70-8f51779e9b9...
Name: 0, dtype: object

In [7]:
# stream community ids
community_df = gds.louvain.stream(g).drop(columns='intermediateCommunityIds')
community_df

Louvain:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,nodeId,communityId
0,8627,3321
1,8628,3321
2,8629,3321
3,8630,3321
4,8631,3321
...,...,...
3498,12125,3498
3499,12126,3499
3500,12127,3500
3501,12128,3501


In [8]:
community_df.groupby('communityId').count().sort_values('nodeId', ascending=False)

Unnamed: 0_level_0,nodeId
communityId,Unnamed: 1_level_1
3321,699
3307,524
2265,382
2838,252
2927,187
...,...
2670,1
2671,1
2672,1
2673,1


In [9]:
# write louvain communities back to database
gds.louvain.write(g, writeProperty='communityId')

Louvain:   0%|          | 0/100 [00:00<?, ?%/s]

writeMillis                                                           1491
nodePropertiesWritten                                                 3503
modularity                                                        0.553671
modularities             [0.5166659771258485, 0.5393207144245495, 0.543...
ranLevels                                                               10
communityCount                                                        1093
communityDistribution    {'p99': 13, 'min': 1, 'max': 695, 'mean': 3.20...
postProcessingMillis                                                     3
preProcessingMillis                                                      0
computeMillis                                                         4400
configuration            {'maxIterations': 10, 'writeConcurrency': 4, '...
Name: 0, dtype: object

In [10]:
# read community ids with other airport node properties
gds.run_cypher('''
    MATCH(a:Airport)
    RETURN a.communityId AS communityId, a.iata AS iata, a.city AS city,
        a.descr AS description
    ORDER BY communityId
''')

Unnamed: 0,communityId,iata,city,description
0,199,TXL,Berlin,"Berlin, Tegel International Airport *Closed*"
1,472,SXF,Berlin,Berlin-Schönefeld International Airport *Closed*
2,941,ILG,Wilmington,New Castle Airport
3,1007,TWB,Toowoomba,Toowoomba Airport
4,1763,HOR,Horta,Horta Airport
...,...,...,...,...
3498,3497,FAC,Faaite,Faaite Airport
3499,3498,RRR,Raroia,Raroia Airport
3500,3499,PKP,Puka Puka,Puka Puka Airport
3501,3500,NAU,Napuka Island,Napuka Island Airport


## Other Community Detection Algorithms

Neo4j Graph Data Science includes many other community detection algorithms.  Below are just some of the others, full list can be found in the [documentation](https://neo4j.com/docs/graph-data-science/current/algorithms/wcc/).

- __Label Propagation__: Similar intent as Louvain. Fast algorithm that parallelizes well. Great for large graphs.
- __Weakly Connected Components (WCC)__: Partitions the graph into sets of connected nodes such that
    - Every node is reachable from any other node in the same set
    - No path exists between nodes from different sets

# Cleanup

In [11]:
# drop projection
g.drop()

graphName                                                         proj
database                                                         neo4j
memoryUsage                                                           
sizeInBytes                                                         -1
nodeCount                                                         3503
relationshipCount                                                46389
configuration        {'relationshipProjection': {'HAS_ROUTE': {'ori...
density                                                       0.003781
creationTime                       2022-11-15T15:13:06.986981000+00:00
modificationTime                   2022-11-15T15:13:07.006719000+00:00
schema               {'graphProperties': {}, 'relationships': {'HAS...
Name: 0, dtype: object

In [12]:
# remove community id property from database
gds.run_cypher('MATCH(a:Airport) REMOVE a.communityId')