Skip to content

Commit

Permalink
Merge pull request #34 from microsoft/cvec-weights
Browse files Browse the repository at this point in the history
Use weighted degree for diagonal augmentation
  • Loading branch information
nicaurvi committed Mar 12, 2020
2 parents 7d4af06 + 1616c8d commit dba00c2
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 43 deletions.
3 changes: 2 additions & 1 deletion docs/release_notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Release Notes

## 0.1.2
- Rename `self_loop_augmentation` to `diagonal_augmentation` and use weighted degree to perform calculation instead of degree only.
## 0.1.1
- [Issue 29](https://github.com/microsoft/topologic/issues/29) Fixed bug in `topologic.io.from_dataset` where an empty networkx graph object (Graph, DiGraph, etc) was being treated as if no networkx Graph object were provided at all.
- Added `is_digraph` parameter to `topologic.io.from_file`. This parameter defaults to False for original behavior. Setting it to True will create a networkx DiGraph object instead.
Expand Down
14 changes: 7 additions & 7 deletions tests/embedding/test_adjacency_spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ def test_adjacency_embedding(self):
self.assertIsInstance(matrix, np.ndarray)
self.assertIsInstance(labels, list)
self.assertEqual(2, matrix.ndim)
expected_matrix = np.array([[0.43445175],
[1.14794954],
[0.8689035]])
expected_matrix = np.array([[0.385095],
[1.140718],
[0.926595]])
expected_label = ['a', 'b', 'c']
np.testing.assert_allclose(expected_matrix, matrix, rtol=1e-6)
self.assertListEqual(expected_label, labels)
Expand All @@ -40,11 +40,11 @@ def test_adjacency_embedding_elbowcut_none(self):
self.assertIsInstance(matrix, np.ndarray)
self.assertIsInstance(labels, list)
self.assertEqual(2, matrix.ndim)
expected_matrix = np.array([[0.43445175, 0.29790657],
[1.14794954, -0.56372701],
[0.8689035, 0.59581314]])
expected_matrix = np.array([[0.385095, -0.351718],
[1.140718, 0.552094],
[0.926595, -0.5335]])
expected_label = ['a', 'b', 'c']
np.testing.assert_allclose(expected_matrix, matrix, rtol=1e-6)
np.testing.assert_allclose(expected_matrix, matrix, rtol=1e-5)
self.assertListEqual(expected_label, labels)

def test_adjacency_embedding_gpickle(self):
Expand Down
27 changes: 17 additions & 10 deletions tests/embedding/test_laplacian_spectral_embedding.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import pickle
import sys
import unittest

import networkx as nx
import numpy as np
import pickle
import pytest

from topologic.embedding import laplacian_embedding


Expand All @@ -21,15 +25,18 @@ def test_laplacian_embedding(self):
self.assertIsInstance(matrix, np.ndarray)
self.assertIsInstance(labels, list)
self.assertEqual(2, matrix.ndim)
expected_matrix = np.array([[0.44095855],
[0.70710678],
[0.5527708]])
expected_matrix = np.array([[0.408248],
[0.707107],
[0.577350]])
expected_label = ['a', 'b', 'c']
np.testing.assert_allclose(expected_matrix, matrix)
np.testing.assert_allclose(expected_matrix, matrix, rtol=1e-5)
self.assertListEqual(expected_label, labels)

def test_laplacian_embedding_elbowcut_none(self):
graph = nx.Graph([('a', 'b', {'weight': 1.0}), ('b', 'c', {'weight': 2.0})])
if sys.platform.startswith('darwin'):
pytest.skip('Test not supported on Mac OS')

graph = nx.Graph([('a', 'b', {'weight': 2.0}), ('b', 'c', {'weight': 2.0})])
result = laplacian_embedding(
graph,
elbow_cut=None,
Expand All @@ -40,11 +47,11 @@ def test_laplacian_embedding_elbowcut_none(self):
self.assertIsInstance(matrix, np.ndarray)
self.assertIsInstance(labels, list)
self.assertEqual(2, matrix.ndim)
expected_matrix = np.array([[0.44095855, 0.51959271],
[0.70710678, -0.06490658],
[0.5527708, -0.33146281]])
expected_matrix = np.array([[5.000000e-01, 4.714045e-01],
[7.071068e-01, -3.333333e-01],
[5.000000e-01, -1.425006e-16]])
expected_label = ['a', 'b', 'c']
np.testing.assert_allclose(expected_matrix, matrix)
np.testing.assert_allclose(expected_matrix, matrix, rtol=1e-5)
self.assertListEqual(expected_label, labels)

def test_laplacian_embedding_gpickle(self):
Expand Down
61 changes: 57 additions & 4 deletions tests/test_graph_augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@

import unittest
import networkx as nx
from topologic import self_loop_augmentation
import numpy as np
from topologic import diagonal_augmentation


class TestDiagonalAugmentation(unittest.TestCase):
def test_diag_aug_for_a_non_graph_raises_exception(self):
with self.assertRaises(TypeError) as raised:
self_loop_augmentation('not a graph')
diagonal_augmentation('not a graph')

self.assertTrue('must be a networkx.Graph' in str(raised.exception))

def test_diag_aug_for_2_nodes_self_loops(self):
graph = nx.Graph([('a', 'b'), ('b', 'c'), ('a', 'a'), ('b', 'b'), ('c', 'c')])
expected_set = {('a', 'a', 0.5), ('a', 'b', None), ('b', 'b', 1.0), ('b', 'c', None), ('c', 'c', 0.5)}

augmented = self_loop_augmentation(graph)
augmented = diagonal_augmentation(graph)
ranked_edge_set = set(augmented.edges(data='weight'))

self.assertEqual(augmented, graph)
Expand All @@ -27,8 +28,60 @@ def test_diag_aug_for_2_nodes_no_self_loops(self):
graph = nx.Graph([('a', 'b'), ('b', 'c')])
expected_set = {('a', 'a', 0.5), ('a', 'b', None), ('b', 'b', 1.0), ('b', 'c', None), ('c', 'c', 0.5)}

augmented = self_loop_augmentation(graph)
augmented = diagonal_augmentation(graph)
ranked_edge_set = set(augmented.edges(data='weight'))

self.assertEqual(augmented, graph)
self.assertEqual(expected_set, ranked_edge_set)

def test_undirected_uses_weighted_degree(self):
start_adajacency = np.array(
[
[0, 1, 1, 0, 0],
[1, 0, 0, 2, 1],
[1, 0, 0, 1, 1],
[0, 2, 1, 0, 0],
[0, 1, 1, 0, 0],
]
)
expected = [
[.5, 1, 1, 0, 0],
[1, 1, 0, 2, 1],
[1, 0, .75, 1, 1],
[0, 2, 1, .75, 0],
[0, 1, 1, 0, .5]
]

g = diagonal_augmentation(nx.Graph(start_adajacency))
augmented_adjacency = nx.adj_matrix(g).todense()

np.testing.assert_array_equal(
augmented_adjacency,
expected
)

def test_directed_averages_in_out_edge_weights(self):
start_adjacency = np.array(
[
[0, 1, -1, 0, 0],
[0, 0, 0, 2, 1],
[1, 0, 0, 1, 1],
[0, 2, 0, 0, 0],
[0, 0, 1, 0, 0],
]
)
expected = [
[.125, 1, -1, 0, 0],
[0, .75, 0, 2, 1],
[1, 0, .375, 1, 1],
[0, 2, 0, .625, 0],
[0, 0, 1, 0, .375],
]

g = diagonal_augmentation(nx.DiGraph(start_adjacency))
augmented_adjacency = nx.adj_matrix(g).todense()

np.testing.assert_array_equal(
augmented_adjacency,
expected
)
4 changes: 2 additions & 2 deletions topologic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
largest_connected_component, \
connected_components_generator
from .partitioned_graph import PartitionedGraph
from .graph_augmentation import rank_edges, self_loop_augmentation
from .graph_augmentation import rank_edges, diagonal_augmentation

from . import similarity
from . import io
Expand All @@ -27,6 +27,6 @@
'largest_connected_component',
'number_connected_components',
'PartitionedGraph',
'self_loop_augmentation',
'diagonal_augmentation',
'UnweightedGraphError'
]
6 changes: 3 additions & 3 deletions topologic/embedding/omnibus_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..connected_components import largest_connected_component
from ..embedding import EmbeddingMethod
from ..graph_augmentation import rank_edges, \
self_loop_augmentation
diagonal_augmentation


def omnibus_embedding(
Expand Down Expand Up @@ -94,7 +94,7 @@ def omnibus_embedding(

starting_graph = largest_connected_component(graphs[0])
starting_graph = rank_edges(starting_graph)
starting_graph = self_loop_augmentation(starting_graph)
starting_graph = diagonal_augmentation(starting_graph)

previous_graph = starting_graph
count = 1
Expand All @@ -104,7 +104,7 @@ def omnibus_embedding(
count = count + 1
current_graph = largest_connected_component(graph)
current_graph = rank_edges(current_graph)
current_graph = self_loop_augmentation(current_graph)
current_graph = diagonal_augmentation(current_graph)

pairwise_graphs = [previous_graph] + [current_graph]
pairwise_graphs_reduced = _reduce_to_common_nodes(pairwise_graphs)
Expand Down
4 changes: 2 additions & 2 deletions topologic/embedding/spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

from .elbow_finder import find_elbows
from ..graph_augmentation import rank_edges, \
self_loop_augmentation
diagonal_augmentation


def _create_augmented_adjacency_matrix(weight_column, working_graph):
logging.debug("rank edges")
ranked_graph = rank_edges(working_graph, weight_column)

logging.debug("add self loops and sensible weights")
augmented_graph = self_loop_augmentation(ranked_graph, weight_column)
augmented_graph = diagonal_augmentation(ranked_graph, weight_column)

sorted_vertices = sorted(augmented_graph.nodes())
graph_matrix = nx.to_scipy_sparse_matrix(augmented_graph, nodelist=sorted_vertices)
Expand Down
33 changes: 19 additions & 14 deletions topologic/graph_augmentation.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import Union

import networkx as nx
import numpy as np
from scipy.stats import rankdata

from . import assertions


def self_loop_augmentation(
graph: nx.classes.graph.Graph,
def diagonal_augmentation(
graph: Union[nx.Graph, nx.DiGraph],
weight_column: str = 'weight'
) -> nx.Graph:
"""
Generates a self loop for each vertex in the graph with a generated weight for each vertex that is the ratio
between its degree in the graph and the total number of *other* vertices in the graph, excluding the original
self loop.
This should be used prior to Spectral Embedding techniques to ensure that there is a reasonable value for
each vertex as it will appear in an adjacency matrix.
Replaces the diagonal of adjacency matrix of the graph with the
weighted degree / number of vertices in graph. For directed graphs,
the weighted in and out degree is averaged.
Modifies the provided graph in place as well as returning it.
:param networkx.Graph graph: The networkx graph to diagonally augment
:param str weight_column: The weight column to augment
:return: The networkx Graph object that was modified in place.
:rtype: networkx.Graph
:param: The networkx graph which will get a replaced diagonal
:type graph: Union[nx.Graph, nx.DiGraph]
:param str weight_column: The weight column of the edge
:return: The networkx Graph or DiGraph object that was modified in place.
:rtype: Union[nx.Graph, nx.DiGraph]
"""
assertions.assert_is_graph(graph)

Expand All @@ -38,11 +38,16 @@ def self_loop_augmentation(
if graph.has_edge(vertex, vertex):
graph.remove_edge(vertex, vertex)

degree = graph.degree(vertex)
if isinstance(graph, nx.DiGraph):
in_degree = graph.in_degree(vertex, weight=weight_column)
out_degree = graph.out_degree(vertex, weight=weight_column)
weighted_degree = (in_degree + out_degree) / 2
else:
weighted_degree = graph.degree(vertex, weight=weight_column)

# add the augmented weight back onto the diagonal
graph.add_edge(vertex, vertex)
graph[vertex][vertex][weight_column] = degree / (vertex_count - 1)
graph[vertex][vertex][weight_column] = weighted_degree / (vertex_count - 1)

return graph

Expand Down

0 comments on commit dba00c2

Please sign in to comment.