In [86]:
import networkx as nx
import numpy as np
from itertools import chain, combinations

In [87]:
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

In [88]:
len(list(powerset([1,2,3,4,5,6])))

64

In [89]:
def find_max_weight_edge(sim_graph):
    # Search for
    max_weight = -1
    tu = 0
    tv = 0
    vertex_count = sim_graph.number_of_nodes()
    for u in range(0,vertex_count):
        linked_nodes = sim_graph.adj[u]
        for v in list(linked_nodes):
            w = sim_graph.edges[u,v]["weight"]
            print("u="+str(u) + "," + str(v) + ",weight=" + str(w))
            if (w > max_weight):
                tu = u
                tv = v
                max_weight = w
    return tu,tv, max_weight

In [90]:
def delete_edges_from(graph, vertex):
    e = graph.adj[vertex]
    for linked in list(e):
        graph.remove_edge(vertex,linked)

In [91]:
def sort_by_metric_func(graph):
    return list(graph.nodes())

In [92]:
def get_metric_ranking(rankings, num):
    return rankings.index(num)

In [93]:
# Eqn 2 in diversification paper
def calc_compdist_from(graph, i, j, lam):
    dimension_dist = 0
    i_props = graph.nodes[i]
    j_props = graph.nodes[j]
    
    rankings = sort_by_metric_func(graph)
    ranking_i = get_metric_ranking(rankings, i)
    ranking_j = get_metric_ranking(rankings, j)
    print("ranking_i" + str(ranking_i))
    print("ranking_j" + str(ranking_j))
    
    # Get the properties
    for k,v in i_props.items():
        # Dimension values only
        if "d" in k:
            dist = (v - j_props[k])
            dimension_dist += (dist ** 2.0)
            
    comp_value = ranking_i + ranking_j + 2 * lam * np.sqrt(dimension_dist)
    print("dimension_dist = " + str(dimension_dist))
    print("comp_value = " + str(comp_value))
    return comp_value
    

In [94]:
# Need to add arbitrary vertex
def max_sum_divergence_approx(sim_graph, phy_count):
    Phy = nx.Graph() 
    lim = int(np.floor(phy_count/2))
    for i in range(0,lim):
        mu, mv, weight = find_max_weight_edge(sim_graph)
        if (mu != mv):
            print("Max weight: " + str(mu) + "->" + str(mv) + ",weight=" + str(weight))
            Phy.add_node(mu)
            Phy.add_node(mv)
            Phy.add_edge(mu,mv)
            Phy.edges[mu,mv]["weight"] = sim_graph.edges[mu,mv]["weight"]
            print("Number of edges in Phy=" + str(Phy.number_of_edges()))
            # remove other connected edges from mu and mv in the s\im_graph
            delete_edges_from(sim_graph, mu)
            delete_edges_from(sim_graph, mv)
            # check further
        
    # If phy_count is odd, add a random vertex
    # for now, only use an even phy_count
    return Phy

In [95]:
def gen_node_props(metrics_count, dim_count, metric_max, dim_max):
    nodeprops = {}
    metrics = []
    
    for di in range(0, dim_count):
        rand_dim = np.random.uniform(low=0, high=dim_max)
        nodeprops["d" + str(di)] = rand_dim
        
    # Generate the metrics
    for mi in range(0,metrics_count):
        rand_metric = np.random.uniform(low=0, high=metric_max)
        metrics.append(rand_metric)
        nodeprops["m" + str(mi)] = rand_metric
    return nodeprops

In [96]:
def create_random_sim_graph(node_count, metric_count, metric_max, dim_count, dim_max, lam):
    Sim = nx.Graph();
    for i in range(0,node_count):
        Sim.add_nodes_from([(i, gen_node_props(metric_count, dim_count, metric_max, dim_max))])
    # add connections from all nodes, depending on their distance 
    for i in range(0, node_count):
        for j in range(0, node_count):
            if (i != j):
                Sim.add_edge(i,j)
                weight = calc_compdist_from(Sim, i, j, lam)
                Sim.edges[i,j]["weight"] = weight
                print("Adding edge from " + str(i) + "->" + str(j) + ", weight = " + str(Sim.edges[i,j]["weight"]))
                #print(Sim.edges[i, j])
    print("Edges: = " + str(Sim.number_of_edges()))
    return Sim

In [97]:
sim = create_random_sim_graph(6, 2, 10.0, 3, 10.0, 0.5)
phy = max_sum_divergence_approx(sim, 10)
phy

ranking_i0
ranking_j1
dimension_dist = 120.93827774041033
comp_value = 11.997194084875028
Adding edge from 0->1, weight = 11.997194084875028
ranking_i0
ranking_j2
dimension_dist = 20.351827626544004
comp_value = 6.511299992966994
Adding edge from 0->2, weight = 6.511299992966994
ranking_i0
ranking_j3
dimension_dist = 105.26068214534524
comp_value = 13.259662867041257
Adding edge from 0->3, weight = 13.259662867041257
ranking_i0
ranking_j4
dimension_dist = 63.45863928194454
comp_value = 11.966093100255893
Adding edge from 0->4, weight = 11.966093100255893
ranking_i0
ranking_j5
dimension_dist = 11.659603523331041
comp_value = 8.414616160468267
Adding edge from 0->5, weight = 8.414616160468267
ranking_i1
ranking_j0
dimension_dist = 120.93827774041033
comp_value = 11.997194084875028
Adding edge from 1->0, weight = 11.997194084875028
ranking_i1
ranking_j2
dimension_dist = 114.55545931028465
comp_value = 13.703058409178409
Adding edge from 1->2, weight = 13.703058409178409
ranking_i1
ranking

<networkx.classes.graph.Graph at 0x7f0a6c249ff0>

In [98]:
def ranking_func(graph,node_id):
    return node_id

In [115]:
def dist_func(graph, ni, nj):
    i_props = graph.nodes[ni]
    j_props = graph.nodes[nj]
    dimension_dist = 0.0
    for k,v in i_props.items():
    # Dimension values only
        if "d" in k:
            dist = (v - j_props[k])
            dimension_dist += (dist ** 2.0)
    return np.sqrt(dimension_dist)

In [100]:
def calc_subset_value_function(graph, subset_inds, lam):
    k = len(subset_inds)
    weight_sum = 0
    for ni in subset_inds:
        # ranking_func is w in the equation
        weight_sum += ranking_func(graph, ni)
    
    dist_sum = 0
    for ni in subset_inds:
        for nj in subset_inds:
            dist_sum += dist_func(graph, ni,nj)
    
    total = (k-1)*weight_sum + 2*lam*dist_sum  
    return total

In [101]:
all_subsets = list(powerset(sim.nodes()))

In [102]:
phy.number_of_nodes()
phy.edges()

EdgeView([(3, 5), (2, 4), (0, 1)])

In [103]:
def subset_value(graph, subset, lam):
    value = calc_subset_value_function(graph, subset, lam)
    return (subset, value)

In [104]:
FIXED_SUBSET_COUNT = 5

In [119]:
def meets_cost_constraint(subset):
    return (len(subset) == FIXED_SUBSET_COUNT)

In [106]:
def subsets_viable_sorted_by_cost(simgraph, lam):
    all_subsets = list(powerset(simgraph.nodes()))
    viable_subsets = filter(meets_cost_constraint, all_subsets)
    subset_info = map(lambda subset: subset_value(simgraph, subset, lam), viable_subsets)
    return subset_info

In [123]:
res = subsets_viable_sorted_by_cost(sim, 0.0)
list(res)

[((0, 1, 2, 3, 4), 40.0),
 ((0, 1, 2, 3, 5), 44.0),
 ((0, 1, 2, 4, 5), 48.0),
 ((0, 1, 3, 4, 5), 52.0),
 ((0, 2, 3, 4, 5), 56.0),
 ((1, 2, 3, 4, 5), 60.0)]

In [None]:
len(list(res))