# Weighted spectral embedding

This is an example of the weighted spectral embedding of a graph, using unit weights or internal node weights (node degrees for an unweighted graph).

In [1]:
from spectral_embedding import *

In [111]:
spectral = SpectralEmbedding()
weighted_spectral = SpectralEmbedding(node_weights = 'degree')

## Toy example

In [32]:
import networkx as nx

In [99]:
graph = nx.karate_club_graph()

In [100]:
ground_truth_labels = list(nx.get_node_attributes(graph, 'club').values())

In [101]:
adjacency = nx.to_scipy_sparse_matrix(graph)

### Embeddings

In [112]:
spectral.fit(adjacency)

<spectral_embedding.SpectralEmbedding at 0x1a20ebb048>

In [113]:
weighted_spectral.fit(adjacency)

<spectral_embedding.SpectralEmbedding at 0x1a20ebb550>

In [114]:
embedding = spectral.embedding_
weighted_embedding = weighted_spectral.embedding_

In [115]:
normalized_embedding = (embedding.T / np.linalg.norm(embedding,axis = 1)).T
normalized_weighted_embedding = (weighted_embedding.T / np.linalg.norm(weighted_embedding,axis = 1)).T

### Clusterings

In [116]:
from sklearn.cluster import KMeans

In [117]:
n_clusters = 2
kmeans = KMeans(n_clusters)
kmeans.fit(embedding)
labels = list(kmeans.labels_)
kmeans.fit(normalized_embedding)
normalized_labels = list(kmeans.labels_)
kmeans.fit(weighted_embedding)
weighted_labels = list(kmeans.labels_)
kmeans.fit(normalized_weighted_embedding)
normalized_weighted_labels = list(kmeans.labels_)

In [118]:
# Ground truth
Counter(ground_truth_labels)

Counter({'Mr. Hi': 17, 'Officer': 17})

In [119]:
# Spectral embedding
Counter(labels), Counter(normalized_labels)

(Counter({0: 19, 1: 15}), Counter({0: 18, 1: 16}))

In [120]:
# Weighted spectral embedding
Counter(weighted_labels), Counter(normalized_weighted_labels)

(Counter({0: 15, 1: 19}), Counter({0: 16, 1: 18}))

## Real data

In [70]:
import urllib.request

url = "http://perso.telecom-paristech.fr/~bonald/graphs/"
dataset = "openflights.graphml.gz"
download = urllib.request.urlretrieve(url + dataset, dataset)

In [71]:
graph = nx.read_graphml(dataset, node_type=int)
print(nx.info(graph))

Name: Openflights
Type: Graph
Number of nodes: 3097
Number of edges: 18193
Average degree:  11.7488


In [73]:
adjacency = nx.to_scipy_sparse_matrix(graph)

### Embeddings

In [74]:
spectral.fit(adjacency)

<spectral_embedding.SpectralEmbedding at 0x1a20ee1588>

In [75]:
weighted_spectral.fit(adjacency)

<spectral_embedding.SpectralEmbedding at 0x1a20ee1550>

In [77]:
embedding = spectral.embedding_
weighted_embedding = weighted_spectral.embedding_

In [91]:
normalized_embedding = (embedding.T / np.linalg.norm(embedding,axis = 1)).T
normalized_weighted_embedding = (weighted_embedding.T / np.linalg.norm(weighted_embedding,axis = 1)).T

### Clusterings

In [None]:
from sklearn.cluster import KMeans

In [92]:
n_clusters = 10
kmeans = KMeans(n_clusters)
kmeans.fit(embedding)
labels = list(kmeans.labels_)
kmeans.fit(normalized_embedding)
normalized_labels = list(kmeans.labels_)
kmeans.fit(weighted_embedding)
weighted_labels = list(kmeans.labels_)
kmeans.fit(normalized_weighted_embedding)
normalized_weighted_labels = list(kmeans.labels_)

In [93]:
from collections import Counter

In [94]:
Counter(labels)

Counter({0: 2985, 1: 3, 2: 5, 3: 18, 4: 30, 5: 3, 6: 26, 7: 4, 8: 21, 9: 2})

In [95]:
Counter(normalized_labels)

Counter({0: 67,
         1: 947,
         2: 92,
         3: 96,
         4: 585,
         5: 111,
         6: 135,
         7: 149,
         8: 454,
         9: 461})

In [96]:
Counter(weighted_labels)

Counter({0: 2965, 1: 2, 2: 26, 3: 21, 4: 3, 5: 21, 6: 30, 7: 11, 8: 3, 9: 15})

In [97]:
Counter(normalized_weighted_labels)

Counter({0: 639,
         1: 595,
         2: 154,
         3: 214,
         4: 525,
         5: 253,
         6: 307,
         7: 140,
         8: 106,
         9: 164})