In [None]:
import pandas as pd
import polars as pl
import numpy as np
import rtsvg
import networkx as nx
rt = rtsvg.RACETrack()

df = pl.DataFrame({'fm':'a b c d e f g g h h i i i f e'.split(),
                   'to':'b c d a a g f i i i a b c h h'.split(),
                   'ct':[1,9,2,3,4,1,2,7,3,4,2,3,9,2,3]})
relates  = [('fm','to')]
clusters = {'__cluster_abc__':['a','b','c'],
            '__cluster_de__' :['d','e']} 
g        = rt.createNetworkXGraph(df, relates)
pos      = nx.spring_layout(g)

pos = {'c': (np.float64(-0.250), np.float64(-0.251)),
       'd': (np.float64(-0.250), np.float64(-0.108)),
       'e': (np.float64(-0.009), np.float64( 0.172)),
       'h': (np.float64( 0.455), np.float64( 0.172)),
       'b': (np.float64(-0.427), np.float64(-0.627)),
       'g': (np.float64( 0.455), np.float64(-0.627)),
       'f': (np.float64( 0.455), np.float64(-0.108)),
       'i': (np.float64(-0.009), np.float64(-0.251)),
       'a': (np.float64(-0.427), np.float64( 0.172)),}

for _cluster_ in clusters:
    x_sum, y_sum = 0.0, 0.0
    for _node_ in clusters[_cluster_]:
        x_sum += pos[_node_][0]
        y_sum += pos[_node_][1]
    pos[_cluster_] = [x_sum/len(clusters[_cluster_]), y_sum/len(clusters[_cluster_])]
params = {'pos':pos, 'link_shape':'curve', 'draw_labels':True, 'w':256, 'h':256}
_link_   = rt.linkNode(df, [('fm','to')], **params)

In [None]:
#
# collapseGraph()
# - only works with polars at the moment
# - limitations -- only single field from/tos can be used (i.e., no multifield nodes)
# - multirelationships get collapsed down to one relationships
# - __fm__, __to__, __count__, __color__
#
def INTEGRATED_INTO_BASE_collapseGraph(df, relationships, node_clusters, count_by=None, count_by_set=False, color_by=None):
    # Create the reverse maps
    rev_map = {}
    for k, v in node_clusters.items():
        for v_ in v: rev_map[v_] = k 
    
    # Per relationship
    __dfs__ = []
    for _relates_ in relationships:
        _fm_, _to_ = _relates_[0], _relates_[1]
        all_nodes  = set(df[_fm_]) | set(df[_to_])
        # fill in the reverse map
        for n in all_nodes:
            if n not in rev_map: rev_map[n] = n
        # remap the nodes in the dataframe
        rev_map_fn = lambda x: rev_map[x]
        df_tmp     = df.with_columns(pl.col(_fm_).replace_strict(rev_map).alias('__fm__'),
                                     pl.col(_to_).replace_strict(rev_map).alias('__to__'))
        df_counter = rt.polarsCounter(df_tmp, ['__fm__','__to__'], count_by, count_by_set)

        if color_by is None:
            df_counter = df_counter.with_columns(pl.lit(rt.co_mgr.getTVColor('data','default')).alias('__color__'))
        else:
            df_colors  = df_tmp.group_by(['__fm__','__to__']).agg(pl.col(color_by).len()  .alias('__color_nuniq__'),
                                                                  pl.col(color_by).first().alias('__color_first_item__'))
            
            if   color_by == _fm_: df_colors = df_colors.with_columns(pl.col('__fm__').alias(_fm_))
            elif color_by == _to_: df_colors = df_colors.with_columns(pl.col('__to__').alias(_to_))

            df_colors = df_colors.with_columns(pl.lit(rt.co_mgr.getTVColor('data','default')).alias('__color_default__'))
            df_colors = df_colors.with_columns(pl.col('__color_first_item__').map_elements(rt.co_mgr.getColor, return_dtype=pl.String).alias('__color_first__'))
            df_colors = df_colors.with_columns(pl.when(pl.col('__color_nuniq__')==1).then(pl.col('__color_first__')).otherwise(pl.col('__color_default__')).alias('__color__'))
            df_colors = df_colors.drop(['__color_nuniq__', '__color_first_item__', '__color_default__', '__color_first__'])

            if   color_by == _fm_: df_colors = df_colors.drop([_fm_])
            elif color_by == _to_: df_colors = df_colors.drop([_to_])

            df_counter = df_counter.join(df_colors, on=['__fm__','__to__'])
        __dfs__.append(df_counter)

    return pl.concat(__dfs__)

df_collapsed     = rt.collapseDataFrameGraphByClusters(df, relates, clusters, color_by='to')
_link_collapsed_ = rt.linkNode(df_collapsed, [('__fm__','__to__')], **params)
rt.tile([_link_, _link_collapsed_], 20)

In [None]:
df_collapsed

In [None]:
#
# collapseDataFrameGraphByClustersDirectional()
# - same as collapseDataFrameGraphsByClusters() but separates out from and to nodes
# - only works with polars at the moment
# - limitations -- only single field from/tos can be used (i.e., no multifield nodes)
# - multirelationships get collapsed down to one relationships
# - __fm__, __to__, __count__, __color__
#
def INTEGRATED_INTO_BASE_collapseDataFrameGraphByClustersDirectional(df, relationships, node_fm_clusters, node_to_clusters, count_by=None, count_by_set=False, color_by=None):
    # Create the reverse maps
    rev_fm_map = {}
    for k, v in node_fm_clusters.items():
        for v_ in v: rev_fm_map[v_] = k 
    rev_to_map = {}
    for k, v in node_to_clusters.items():
        for v_ in v: rev_to_map[v_] = k 

    # Per relationship
    __dfs__ = []
    for _relates_ in relationships:
        _fm_, _to_ = _relates_[0], _relates_[1]
        all_nodes  = set(df[_fm_]) | set(df[_to_])
        # fill in the reverse map
        for n in all_nodes:
            if n not in rev_fm_map: rev_fm_map[n] = n # this will fail if the types don't match
            if n not in rev_to_map: rev_to_map[n] = n # this will fail if the types don't match
        # remap the nodes in the dataframe
        rev_fm_map_fn, rev_to_map_fn = lambda x: rev_fm_map[x], lambda x: rev_to_map[x]
        df_tmp     = df.with_columns(pl.col(_fm_).replace_strict(rev_fm_map).alias('__fm__'),
                                     pl.col(_to_).replace_strict(rev_to_map).alias('__to__'))
        df_counter = rt.polarsCounter(df_tmp, ['__fm__','__to__'], count_by, count_by_set)

        if color_by is None:
            df_counter = df_counter.with_columns(pl.lit(rt.co_mgr.getTVColor('data','default')).alias('__color__'))
        else:
            df_colors  = df_tmp.group_by(['__fm__','__to__']).agg(pl.col(color_by).len()  .alias('__color_nuniq__'),
                                                                  pl.col(color_by).first().alias('__color_first_item__'))
            
            if   color_by == _fm_: df_colors = df_colors.with_columns(pl.col('__fm__').alias(_fm_))
            elif color_by == _to_: df_colors = df_colors.with_columns(pl.col('__to__').alias(_to_))

            df_colors = df_colors.with_columns(pl.lit(rt.co_mgr.getTVColor('data','default')).alias('__color_default__'))
            df_colors = df_colors.with_columns(pl.col('__color_first_item__').map_elements(rt.co_mgr.getColor, return_dtype=pl.String).alias('__color_first__'))
            df_colors = df_colors.with_columns(pl.when(pl.col('__color_nuniq__')==1).then(pl.col('__color_first__')).otherwise(pl.col('__color_default__')).alias('__color__'))
            df_colors = df_colors.drop(['__color_nuniq__', '__color_first_item__', '__color_default__', '__color_first__'])

            if   color_by == _fm_: df_colors = df_colors.drop([_fm_])
            elif color_by == _to_: df_colors = df_colors.drop([_to_])

            df_counter = df_counter.join(df_colors, on=['__fm__','__to__'])
        __dfs__.append(df_counter)

    return pl.concat(__dfs__)

fm_clusters = {'a_fm': set(['a','c'])}
pos['a_fm'] = (np.float64(-0.357), np.float64( 0.102))

to_clusters = {'b_to': set(['b','d'])}
pos['b_to'] = (np.float64(-0.357), np.float64(-0.577))

df_collapsed     = rt.collapseDataFrameGraphByClustersDirectional(df, relates, fm_clusters, to_clusters, color_by='to')
_link_collapsed_ = rt.linkNode(df_collapsed, [('__fm__','__to__')], **params)
rt.tile([_link_, _link_collapsed_], 20)