In [None]:
import pandas as pd 

## Node-level (person-level) inforamtion

In [None]:
nodestats = pd.read_parquet("data/temp/nodestats.parquet")
nodestats.columns = ["index", "cluster_id", "node_id", "eigen_centrality"]
nodestats

## Edge-level (pairwise-comparison level) information

In [None]:
df_e = pd.read_parquet('data/graph/df_e.parquet') 
edges = pd.read_parquet("data/temp/edgestats.parquet")
df_e = df_e.drop("tf_adjusted_match_prob",axis=1)
df_e = edges.merge(df_e, left_on=['src', 'dst'], right_on = ['unique_id_l', 'unique_id_r'])
df_e = df_e.rename(columns={'component': 'cluster_id', 'eb': 'edge_betweenness'})
df_e.head(5)

## Subgraph-level (cluster-level) information

In [4]:
graphstats = pd.read_parquet("data/temp/graphstats.parquet")
graphstats = graphstats.rename(columns={'component': 'cluster_id', 'nodes': 'node_ids'})
graphstats.head(2)

Unnamed: 0,cluster_id,node_ids,nodecount,edgecount,density,diameter,radius,transitivity,tri_clustcoeff,sq_clustcoeff,graphhash
0,12,"[226, 227, 228]",3,3,1.0,1,1,1.0,1.0,0.0,7d2c307dbd866960fae5a905cc5447de
1,28,"[409, 411]",2,1,1.0,1,1,0.0,0.0,0.0,2148f1da1ac29711e1273e364d4127c4


## Visualisation

#### Node data

In [5]:
def link_data_with_tooltip(df, source_field = 'src', target_field='dst', cols_to_retain=[], cols_to_drop_from_tooltip=[], cluster_id = None, cluster_field='cluster_id'):
    if cluster_id:
        df = df[df[cluster_field] == cluster_id]
        
    recs =  df.to_dict(orient='records')
    new_recs = []
    for r in recs:
        new_row = {}
        
        for c in cols_to_retain:
            new_row[c] = r[c]
            new_row['source'] = r[source_field]
            new_row['target'] = r[target_field]
        
        tooltip_cols = [c for c in r.keys() if c not in cols_to_drop_from_tooltip]
        tooltip = {}
        for c in tooltip_cols:
            tooltip[c] = r[c] 
        new_row['tooltip'] = tooltip
        new_recs.append(new_row)
    return new_recs

def node_data_with_tooltip(df, cols_to_retain=['cluster_id'], cols_to_drop_from_tooltip=[], cluster_id=None, cluster_field='cluster_id' ):
    if cluster_id:
        df = df[df[cluster_field] == cluster_id]
        
    recs =  df.to_dict(orient='records')
    new_recs = []
    for r in recs:
        new_row = {}
        
        for c in cols_to_retain:
            new_row[c] = r[c]
        
        tooltip_cols = [c for c in r.keys() if c not in cols_to_drop_from_tooltip]
        tooltip = {}
        for c in tooltip_cols:
            tooltip[c] = r[c] 
        new_row['tooltip'] = tooltip
        new_recs.append(new_row)
    return new_recs



In [16]:
import json
def display_outputs(cluster_id, df_e, nodestats, graphstats):
    
    link_data = link_data_with_tooltip(df_e, cluster_id=cluster_id, cols_to_retain = ['match_probability', 'edge_betweenness'],cols_to_drop_from_tooltip=['cluster_id', 'src', 'dst', 'group_l', 'group_r'])

    node_data = node_data_with_tooltip(nodestats,cols_to_retain = ['cluster_id', 'eigen_centrality', 'node_id'], cluster_id=cluster_id)

    with open('data/graph/force_template.vg.json') as f:
        vl = json.load(f)
    vl['data'][0] = {
        "name": "node-data",
        "values":node_data

    }

    vl['data'][1] = {
        "name": "link-data",
        "values":  link_data

    }

    vl['width'] = 400
    vl['height'] = 400
    from IPython.display import Javascript
    script = f"""
       var script = document.createElement('script');
        script.type = 'text/javascript';
        script.src = '//cdn.jsdelivr.net/npm/vega@5';
        document.head.appendChild(script);

        var script = document.createElement('script');
        script.type = 'text/javascript';
        script.src = '//cdn.jsdelivr.net/npm/vega-embed@6';
        document.head.appendChild(script);

        var spec = `{json.dumps(vl)}`
        spec= JSON.parse(spec)
        vegaEmbed(element, spec).then(function(result) {{
          }}).catch(console.error);  
    """
    display(graphstats[graphstats['cluster_id'] == cluster_id])
    display(df_e[df_e['cluster_id'] == cluster_id])

    display(Javascript(script))
    

In [26]:
from IPython.display import display, clear_output
import ipywidgets as widgets


In [27]:
output = widgets.Output()

In [28]:
dd_values = list(nodestats["cluster_id"].unique())



In [33]:
w = widgets.Dropdown(
    options=dd_values,

    description='Cluster:',
)


In [34]:
def on_change(change):
    output.clear_output()
    with output:
        display_outputs(change['new'], df_e, nodestats, graphstats)




In [35]:
w.observe(on_change,names=['value'])



In [36]:
display(w)
output

Dropdown(description='Cluster:', options=(12, 28, 29, 30, 33, 42, 67, 8589934628, 8589934658, 8589934662, 1717…

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '     cluster_id    node_ids  nodecount…