Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stochastic zero row + pagerank #1001

Merged
merged 7 commits into from
Nov 6, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions networkx/algorithms/link_analysis/pagerank_alg.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

@not_implemented_for('multigraph')
def pagerank(G, alpha=0.85, personalization=None,
max_iter=100, tol=1.0e-8, nstart=None, weight='weight',
max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
dangling=None):
"""Return the PageRank of the nodes in the graph.

Expand All @@ -27,7 +27,8 @@ def pagerank(G, alpha=0.85, personalization=None,
Parameters
-----------
G : graph
A NetworkX graph.
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.

alpha : float, optional
Damping parameter for PageRank, default=0.85.
Expand Down Expand Up @@ -77,7 +78,7 @@ def pagerank(G, alpha=0.85, personalization=None,

The PageRank algorithm was designed for directed graphs but this
algorithm does not check if the input graph is directed and will
execute on undirected graphs by converting each oriented edge in the
execute on undirected graphs by converting each edge in the
directed graph to two edges.

See Also
Expand Down Expand Up @@ -136,7 +137,7 @@ def pagerank(G, alpha=0.85, personalization=None,
'Missing nodes %s' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n) == 0.0]
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

# power iteration: make up to max_iter iterations
for _ in range(max_iter):
Expand All @@ -151,21 +152,21 @@ def pagerank(G, alpha=0.85, personalization=None,
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
# check convergence, l1 norm
err = sum([abs(x[n] - xlast[n]) for n in x])
if err < tol:
if err < N*tol:
return x
raise NetworkXError('pagerank: power iteration failed to converge '
'in %d iterations.' % max_iter)


@not_implemented_for('multigraph')
def google_matrix(G, alpha=0.85, personalization=None,
nodelist=None, weight='weight', dangling=None):
"""Return the Google matrix of the graph.

Parameters
-----------
G : graph
A NetworkX graph.
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.

alpha : float
The damping factor.
Expand Down Expand Up @@ -205,6 +206,10 @@ def google_matrix(G, alpha=0.85, personalization=None,
there exists a path between every pair of nodes in the graph, or else there
is the potential of "rank sinks."

This implementation works with Multi(Di)Graphs. For multigraphs the
weight between two nodes is set to be the sum of all edge weights
between those nodes.

See Also
--------
pagerank, pagerank_numpy, pagerank_scipy
Expand Down Expand Up @@ -255,7 +260,6 @@ def google_matrix(G, alpha=0.85, personalization=None,
return alpha * M + (1 - alpha) * np.outer(np.ones(N), p)


@not_implemented_for('multigraph')
def pagerank_numpy(G, alpha=0.85, personalization=None, weight='weight',
dangling=None):
"""Return the PageRank of the nodes in the graph.
Expand All @@ -267,7 +271,8 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight='weight',
Parameters
-----------
G : graph
A NetworkX graph.
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.

alpha : float, optional
Damping parameter for PageRank, default=0.85.
Expand Down Expand Up @@ -305,7 +310,9 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight='weight',
eigenvalue solvers. This will be the fastest and most accurate
for small graphs.

This implementation works with Multi(Di)Graphs.
This implementation works with Multi(Di)Graphs. For multigraphs the
weight between two nodes is set to be the sum of all edge weights
between those nodes.

See Also
--------
Expand Down Expand Up @@ -334,7 +341,6 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight='weight',
return dict(zip(G, map(float, largest / norm)))


@not_implemented_for('multigraph')
def pagerank_scipy(G, alpha=0.85, personalization=None,
max_iter=100, tol=1.0e-6, weight='weight',
dangling=None):
Expand All @@ -347,7 +353,8 @@ def pagerank_scipy(G, alpha=0.85, personalization=None,
Parameters
-----------
G : graph
A NetworkX graph.
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.

alpha : float, optional
Damping parameter for PageRank, default=0.85.
Expand Down Expand Up @@ -390,6 +397,10 @@ def pagerank_scipy(G, alpha=0.85, personalization=None,
The eigenvector calculation uses power iteration with a SciPy
sparse matrix representation.

This implementation works with Multi(Di)Graphs. For multigraphs the
weight between two nodes is set to be the sum of all edge weights
between those nodes.

See Also
--------
pagerank, pagerank_numpy, google_matrix
Expand All @@ -413,7 +424,7 @@ def pagerank_scipy(G, alpha=0.85, personalization=None,
M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
dtype=float)
S = scipy.array(M.sum(axis=1)).flatten()
S[S > 0] = 1.0 / S[S > 0]
S[S != 0] = 1.0 / S[S != 0]
Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
M = Q * M

Expand Down
25 changes: 12 additions & 13 deletions networkx/generators/stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
# All rights reserved.
# BSD license.
import networkx as nx
from networkx.utils import not_implemented_for
__author__ = "Aric Hagberg <aric.hagberg@gmail.com>"
__all__ = ['stochastic_graph']

@not_implemented_for('multigraph')
@not_implemented_for('undirected')
def stochastic_graph(G, copy=True, weight='weight'):
"""Return a right-stochastic representation of G.

Expand All @@ -17,30 +20,26 @@ def stochastic_graph(G, copy=True, weight='weight'):

Parameters
-----------
G : graph
A NetworkX graph
G : directed graph
A NetworkX DiGraph

copy : boolean, optional
If True make a copy of the graph, otherwise modify the original graph

weight : edge attribute key (optional, default='weight')
Edge data key used for weight. If no attribute is found for an edge
the edge weight is set to 1.
the edge weight is set to 1. Weights must be positive numbers.
"""
if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
raise nx.NetworkXError('stochastic_graph not implemented '
'for multigraphs')

if not G.is_directed():
raise nx.NetworkXError('stochastic_graph not implemented '
'for undirected graphs')

import warnings
if copy:
W = nx.DiGraph(G)
else:
W = G # reference original graph, no copy

degree = W.out_degree(weight=weight)
for (u,v,d) in W.edges(data=True):
d[weight] = float(d.get(weight,1.0))/degree[u]
if degree[u] == 0:
warnings.warn('zero out-degree for node %s'%u)
d[weight] = 0.0
else:
d[weight] = float(d.get(weight,1.0))/degree[u]
return W
4 changes: 2 additions & 2 deletions networkx/generators/tests/test_stochastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ def test_stochastic_ints():
[(0, 1, {'weight': 0.5}),
(0, 2, {'weight': 0.5})])

@raises(nx.NetworkXError)
@raises(nx.NetworkXNotImplemented)
def test_stochastic_graph_input():
S = nx.stochastic_graph(nx.Graph())

@raises(nx.NetworkXError)
@raises(nx.NetworkXNotImplemented)
def test_stochastic_multigraph_input():
S = nx.stochastic_graph(nx.MultiGraph())