# Community Detection Algorithm

## Libraries and Set Up

In [None]:
# If needed
!pip install plotly
!pip install pycountry

In [22]:
import neo4j
import pandas as pd
import numpy as np
import plotly.express as px
import pycountry
import copy
from IPython.display import display, HTML
import plotly.io as pio
pio.renderers.default = 'notebook'

In [6]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [8]:
session = driver.session(database="neo4j")

In [9]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

## Louvain Modularity

### Exporter

In [180]:
query = "CALL gds.graph.drop('ds_graph', false) yield graphName"
session.run(query)

query = """CALL gds.graph.project('ds_graph', 'Country',
  {EXPORTS_TO: {orientation: 'NATURAL', properties: {weight_kg: {property: 'weight_kg'}}}}
)"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7f3bd29100a0>

In [181]:
query = """

CALL gds.louvain.stream('ds_graph', {includeIntermediateCommunities: true, relationshipWeightProperty: 'weight_kg'})
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS name, communityId as community, intermediateCommunityIds as intermediate_community
ORDER BY community, name ASC

"""

exp = my_neo4j_run_query_pandas(query)
exp

Unnamed: 0,name,community,intermediate_community
0,SU,38,[38]
1,KV,55,[55]
2,BV,141,[141]
3,CP,160,[160]
4,BI,166,[166]
...,...,...,...
257,CG,258,[258]
258,EE,258,[258]
259,GE,258,[258]
260,UA,258,[258]


### Importer

In [184]:
query = "CALL gds.graph.drop('ds_graph', false) yield graphName"
session.run(query)

query = """CALL gds.graph.project('ds_graph', 'Country',
  {IMPORTS_FROM: {orientation: 'NATURAL', properties: {weight_kg: {property: 'weight_kg'}}}}
)"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7f3bdb801910>

In [185]:
query = """

CALL gds.louvain.stream('ds_graph', {includeIntermediateCommunities: true, relationshipWeightProperty: 'weight_kg'})
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS name, communityId as community, intermediateCommunityIds as intermediate_community
ORDER BY community, name ASC

"""

imp = my_neo4j_run_query_pandas(query)
imp

Unnamed: 0,name,community,intermediate_community
0,AF,42,"[24, 42]"
1,AM,42,"[24, 42]"
2,AU,42,"[24, 42]"
3,AW,42,"[24, 42]"
4,BG,42,"[24, 42]"
...,...,...,...
257,XV,258,"[232, 258]"
258,YE,258,"[232, 258]"
259,YT,258,"[169, 258]"
260,YU,258,"[29, 258]"


## Data Wrangling

In [179]:
# Converting Country Code for Plots

def iso2_to_iso3(iso2):
    try:
        return pycountry.countries.get(alpha_2=iso2.upper()).alpha_3
    except:
        return None

In [182]:
# Louvain Communities of Exporters

exp_cleaned = copy.deepcopy(exp)
exp_cleaned['name'] = exp_cleaned['name'].apply(iso2_to_iso3)
exp_cleaned = exp_cleaned.dropna(subset=['name'])

exp_count = exp_cleaned.groupby('community').size().reset_index().rename(columns={0:'Community Size'})
exp_cleaned = pd.merge(exp_cleaned, exp_count, on='community', how='left')
exp_cleaned['Community Size'] = exp_cleaned['Community Size'].astype(str)
exp_cleaned = exp_cleaned.sort_values(by='Community Size')

In [186]:
# Louvain Communities of Importers

imp_cleaned = copy.deepcopy(imp)
imp_cleaned['name'] = imp_cleaned['name'].apply(iso2_to_iso3)
imp_cleaned = imp_cleaned.dropna(subset=['name'])

imp_count = imp_cleaned.groupby('community').size().reset_index().rename(columns={0:'Community Size'})
imp_cleaned = pd.merge(imp_cleaned, imp_count, on='community', how='left')
imp_cleaned['Community Size'] = imp_cleaned['Community Size'].astype(str)
imp_cleaned = imp_cleaned.sort_values(by='Community Size')

In [187]:
# Louvain Intermediate Communities of Importers

imp_sec = copy.deepcopy(imp)
imp_sec['intermediate'] = imp_sec['intermediate_community'].apply(lambda x: x[0])
imp_sec['name'] = imp_sec['name'].apply(iso2_to_iso3)
imp_sec = imp_sec.dropna(subset=['name'])

imp_sec_count = imp_sec.groupby('intermediate').size().reset_index().rename(columns={0:'Community Size'})
imp_sec_cleaned = pd.merge(imp_sec, imp_sec_count, on='intermediate', how='left')
imp_sec_cleaned['Community Size'] = imp_sec_cleaned['Community Size'].astype(str)
imp_sec_cleaned = imp_sec_cleaned.sort_values(by='Community Size')

## Graphs

Interactive graphs will not display in non-interactive environments such as GitLab. Graphs can be found in the slides folder titled the following:
- Louvain Exporter.png
- Louvain Importer.png
- Louvain Importer Intermediate.png

In [190]:
# Exporter Community Clusters

fig = px.choropleth(
    exp_cleaned,
    locations='name',     
    color='Community Size',     
    color_continuous_scale='Viridis',
    projection='robinson',
    title='Louvain Community Detection Algorithm: Exporter Community Clusters',
    category_orders={'Community Size': 
                     sorted(exp_cleaned['Community Size'].unique(), key=lambda x: int(x))}
)

fig.update_layout(width=5000, height=1400,
    geo=dict(showframe=False, showcoastlines=True),
    title=dict(
        text='Louvain Community Detection Algorithm: Exporter Community Clusters',
        x=0.525, xanchor='center'
        , font=dict(size=40), y=0.99),      
    legend=dict(x=0.75, title='Community Size' ,font=dict(size=30), title_font=dict(size=32)
    )
)

fig.show() 



In [191]:
# Importer Community Clusters

fig = px.choropleth(
    imp_cleaned,
    locations='name',     
    color='Community Size',     
    #color_continuous_scale='Viridis',
    projection='robinson',
    title='Louvain Community Detection Algorithm: Importer Community Clusters',
    category_orders={'Community Size': 
                     sorted(imp_cleaned['Community Size'].unique(), key=lambda x: int(x))}
)

fig.update_layout(width=5000, height=1400,
    geo=dict(showframe=False, showcoastlines=True),
                  
    title=dict(
        text='Louvain Community Detection Algorithm: Importer Community Clusters',
        x=0.525, xanchor='center'
        , font=dict(size=40), y=0.99),     
    legend=dict(x=0.75, title='Community Size' ,font=dict(size=30), title_font=dict(size=32)
    )
)

fig.show() 


In [192]:
# Importer Intermediate Community Clusters

fig = px.choropleth(
    imp_sec_cleaned,
    locations='name',     
    color='Community Size',     
    projection='robinson',
    title='Louvain Community Detection Algorithm:<br>Importer Intermediate Community Clusters',
    category_orders={'Community Size': 
                     sorted(imp_sec_cleaned['Community Size'].unique(), key=lambda x: int(x))}
)

fig.update_layout(width=5000, height=1400,
    geo=dict(showframe=False, showcoastlines=True),
    title=dict(
        text='Louvain Community Detection Algorithm: Importer Intermediate Community Clusters',
        x=0.525, xanchor='center'
        , font=dict(size=40), y=0.99),
    legend=dict(x=0.75, title='Community Size' ,font=dict(size=30), title_font=dict(size=32)
   )
)

fig.show() 
