In [1]:
import pandas as pd 

## Node-level (person-level) inforamtion

In [2]:
nodestats = pd.read_parquet("data/temp/nodestats.parquet")
nodestats.columns = ["index", "cluster_id", "node_id", "eigen_centrality"]
nodestats

Unnamed: 0,index,cluster_id,node_id,eigen_centrality
0,226,12,226,0.577350
1,227,12,227,0.577350
2,228,12,228,0.577350
3,409,28,409,0.707107
4,411,28,411,0.707107
...,...,...,...,...
421,194,34359738372,194,0.707107
422,879,34359738441,879,0.707107
423,880,34359738441,880,0.707107
424,954,42949673030,954,0.707107


## Edge-level (pairwise-comparison level) information

In [None]:
df_e = pd.read_parquet('data/graph/df_e.parquet') 
edges = pd.read_parquet("data/temp/edgestats.parquet")
df_e = df_e.drop("tf_adjusted_match_prob",axis=1)
df_e = edges.merge(df_e, left_on=['src', 'dst'], right_on = ['unique_id_l', 'unique_id_r'])
df_e = df_e.rename(columns={'component': 'cluster_id', 'eb': 'edge_betweenness'})
df_e.head(5)

## Subgraph-level (cluster-level) information

In [None]:
graphstats = pd.read_parquet("data/temp/graphstats.parquet")
graphstats = graphstats.rename(columns={'component': 'cluster_id', 'nodes': 'node_ids'})
graphstats.head(2)

## Visualisation

#### Node data

In [None]:
def link_data_with_tooltip(df, source_field = 'src', target_field='dst', cols_to_retain=[], cols_to_drop_from_tooltip=[], cluster_id = None, cluster_field='cluster_id'):
    if cluster_id:
        df = df[df[cluster_field] == cluster_id]
        
    recs =  df.to_dict(orient='records')
    new_recs = []
    for r in recs:
        new_row = {}
        
        for c in cols_to_retain:
            new_row[c] = r[c]
            new_row['source'] = r[source_field]
            new_row['target'] = r[target_field]
        
        tooltip_cols = [c for c in r.keys() if c not in cols_to_drop_from_tooltip]
        tooltip = {}
        for c in tooltip_cols:
            tooltip[c] = r[c] 
        new_row['tooltip'] = tooltip
        new_recs.append(new_row)
    return new_recs

def node_data_with_tooltip(df, cols_to_retain=['cluster_id'], cols_to_drop_from_tooltip=[], cluster_id=None, cluster_field='cluster_id' ):
    if cluster_id:
        df = df[df[cluster_field] == cluster_id]
        
    recs =  df.to_dict(orient='records')
    new_recs = []
    for r in recs:
        new_row = {}
        
        for c in cols_to_retain:
            new_row[c] = r[c]
        
        tooltip_cols = [c for c in r.keys() if c not in cols_to_drop_from_tooltip]
        tooltip = {}
        for c in tooltip_cols:
            tooltip[c] = r[c] 
        new_row['tooltip'] = tooltip
        new_recs.append(new_row)
    return new_recs



In [240]:
import json
from IPython.display import display, clear_output
import ipywidgets as widgets

def display_outputs(cluster_id, df_e, nodestats, graphstats):
    
    link_data = link_data_with_tooltip(df_e, cluster_id=cluster_id, cols_to_retain = ['match_probability', 'edge_betweenness'],cols_to_drop_from_tooltip=['cluster_id', 'src', 'dst', 'group_l', 'group_r'])

    node_data = node_data_with_tooltip(nodestats,cols_to_retain = ['cluster_id', 'eigen_centrality', 'node_id'], cluster_id=cluster_id)

    with open('data/graph/force_template.vg.json') as f:
        vl = json.load(f)
    vl['data'][0] = {
        "name": "node-data",
        "values":node_data

    }

    vl['data'][1] = {
        "name": "link-data",
        "values":  link_data

    }

    vl['width'] = 400
    vl['height'] = 400
    from IPython.display import Javascript
    script = f"""
       var script = document.createElement('script');
        script.type = 'text/javascript';
        script.src = '//cdn.jsdelivr.net/npm/vega@5';
        document.head.appendChild(script);

        var script = document.createElement('script');
        script.type = 'text/javascript';
        script.src = '//cdn.jsdelivr.net/npm/vega-embed@6';
        document.head.appendChild(script);

        var spec = `{json.dumps(vl)}`
        spec= JSON.parse(spec)
        vegaEmbed(element, spec).then(function(result) {{
          }}).catch(console.error);  
    """
    display(graphstats[graphstats['cluster_id'] == cluster_id])
    display(df_e[df_e['cluster_id'] == cluster_id])

    display(Javascript(script))
    
def on_change(change):
    output.clear_output()
    with output:
        display_outputs(change['new'], df_e, nodestats, graphstats)


output = widgets.Output()

with output:
    display_outputs(39, df_e, nodestats, graphstats)
    

dd_values = sorted(list(nodestats["cluster_id"].unique()))

w = widgets.Dropdown(
    options=dd_values,
    value=39,
    description='Cluster:',
)


def on_any_change(change):
    v1 = w.value
    v2 = 

w.observe(on_change,names=['value'])

# w_edge_metric = widgets.RadioButtons(description='Edge metric', options=['edge_betweenness','tf_adjusted_match_prob'])





In [241]:
display(w)
# display(w_edge_metric)
display(output)


Dropdown(description='Cluster:', index=20, options=(1, 3, 4, 6, 7, 12, 13, 18, 20, 22, 24, 27, 28, 29, 30, 32,…

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '     cluster_id              node_ids …

In [193]:


graphstats.sort_values('nodecount')

Unnamed: 0,cluster_id,node_ids,nodecount,edgecount,density,diameter,radius,transitivity,tri_clustcoeff,sq_clustcoeff,graphhash
149,42949673030,"[954, 956]",2,1,1.000,1,1,0.000,0.000,0.000,2148f1da1ac29711e1273e364d4127c4
57,34359738449,"[947, 949]",2,1,1.000,1,1,0.000,0.000,0.000,2148f1da1ac29711e1273e364d4127c4
111,34359738396,"[440, 443]",2,1,1.000,1,1,0.000,0.000,0.000,2148f1da1ac29711e1273e364d4127c4
55,25769803854,"[997, 999]",2,1,1.000,1,1,0.000,0.000,0.000,2148f1da1ac29711e1273e364d4127c4
112,34359738420,"[679, 681]",2,1,1.000,1,1,0.000,0.000,0.000,2148f1da1ac29711e1273e364d4127c4
...,...,...,...,...,...,...,...,...,...,...,...
69,8589934616,"[398, 401, 400, 402, 403]",5,10,1.000,1,1,1.000,1.000,1.000,b0df24c18b6c07ad010a355fc94f5803
77,25769803812,"[442, 441, 439, 444, 445]",5,7,0.700,2,1,0.600,0.800,1.000,4afb3964482c6dc217ecd87e3da42564
82,61,"[798, 800, 804, 802, 799]",5,4,0.400,2,1,0.000,0.000,0.000,97181c87a84cb6c7002c600e3d3ab98b
118,38,"[495, 493, 496, 494, 497]",5,8,0.800,2,1,0.789,0.867,1.000,0d5397367d579892566a67a3602dada1


In [None]:
dd1 = widgets.Dropdown(options =  ['dd1_a', 'dd1_b'] )
dd2 = widgets.Dropdown(options =  ['dd2_x', 'dd2_y'] )

output = widgets.Output()

def on_change_all(change):
    d1 = dd1.value
    d2 = dd2.value
    
    output.clear_output()
    with output:
        display(d1)
        display(d2)

dd1.observe(on_change_all, names='value')
dd2.observe(on_change_all, names='value')

display(dd1)
display(dd2)
display(output)
on_change_all('a')