Skip to content

Commit

Permalink
Page rank algorithm support (#623)
Browse files Browse the repository at this point in the history
* Added PageRank algorithm.

* Added page rank doc test.

* Added UnitMeasure trait to generalize Page rank.

* Some code  refacto.

* Added benchmark and deleted iteration prints.

* Moved page rank test in tests folder.

* Added doc and quickcheck to match contribution guide requirements.

* Added time and space complexity.

* Correction of doc typo.

* Added parallel computation of page rank along with test and bench.

* Improved performance on edge iterations. Dropped useless Trait constrait.

* Corrected Nan values in parallel page ranks. Improved performance by iterating once over out edges.

* Added tolerance to speed up further parallel page rank.

* Added empty vec return instead of panicking on empty graph. Doc for UnitMeasure.

* Apply suggestions from code review

Return empty vec from parallel_pg too. Add link to doc.

---------

Co-authored-by: Agustín Borgna <agustinborgna@gmail.com>
  • Loading branch information
geosarr and ABorgna committed Apr 1, 2024
1 parent c71f6e4 commit 4678de4
Show file tree
Hide file tree
Showing 5 changed files with 366 additions and 1 deletion.
36 changes: 36 additions & 0 deletions benches/page_rank.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#![feature(test)]
extern crate petgraph;
extern crate test;

use test::Bencher;

use petgraph::algo::page_rank;

#[allow(dead_code)]
mod common;

use common::directed_fan;

#[cfg(feature = "rayon")]
use petgraph::algo::page_rank::parallel_page_rank;
#[cfg(feature = "rayon")]
use rayon::prelude::*;

#[bench]
fn page_rank_bench(bench: &mut Bencher) {
static NODE_COUNT: usize = 500;
let g = directed_fan(NODE_COUNT);
bench.iter(|| {
let _ranks = page_rank(&g, 0.6_f64, 10);
});
}

#[bench]
#[cfg(feature = "rayon")]
fn par_page_rank_bench(bench: &mut Bencher) {
static NODE_COUNT: usize = 2_000;
let g = directed_fan(NODE_COUNT);
bench.iter(|| {
let _ranks = parallel_page_rank(&g, 0.6_f64, 100, None);
});
}
43 changes: 43 additions & 0 deletions src/algo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub mod floyd_warshall;
pub mod isomorphism;
pub mod k_shortest_path;
pub mod matching;
pub mod page_rank;
pub mod simple_paths;
pub mod tred;

Expand Down Expand Up @@ -44,6 +45,7 @@ pub use isomorphism::{
};
pub use k_shortest_path::k_shortest_path;
pub use matching::{greedy_matching, maximum_matching, Matching};
pub use page_rank::page_rank;
pub use simple_paths::all_simple_paths;

/// \[Generic\] Return the number of connected components of the graph.
Expand Down Expand Up @@ -901,3 +903,44 @@ macro_rules! impl_bounded_measure_float(
);

impl_bounded_measure_float!(f32, f64);

/// A floating-point measure that can be computed from `usize`
/// and with a default measure of proximity.
pub trait UnitMeasure:
Measure
+ std::ops::Sub<Self, Output = Self>
+ std::ops::Mul<Self, Output = Self>
+ std::ops::Div<Self, Output = Self>
+ std::iter::Sum
{
fn zero() -> Self;
fn one() -> Self;
fn from_usize(nb: usize) -> Self;
fn default_tol() -> Self;
}

macro_rules! impl_unit_measure(
( $( $t:ident ),* )=> {
$(
impl UnitMeasure for $t {
fn zero() -> Self {
0 as $t
}
fn one() -> Self {
1 as $t
}

fn from_usize(nb: usize) -> Self {
nb as $t
}

fn default_tol() -> Self {
1e-6 as $t
}

}

)*
}
);
impl_unit_measure!(f32, f64);
185 changes: 185 additions & 0 deletions src/algo/page_rank.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
use crate::visit::{EdgeRef, IntoEdges, NodeCount, NodeIndexable};

#[cfg(feature = "rayon")]
use rayon::prelude::*;

use super::UnitMeasure;
/// \[Generic\] Page Rank algorithm.
///
/// Computes the ranks of every node in a graph using the [Page Rank algorithm][pr].
///
/// Returns a `Vec` container mapping each node index to its rank.
///
/// # Panics
/// The damping factor should be a number of type `f32` or `f64` between 0 and 1 (0 and 1 included). Otherwise, it panics.
///
/// # Complexity
/// Time complexity is **O(N|V|²|E|)**.
/// Space complexity is **O(|V| + |E|)**
/// where **N** is the number of iterations, **|V|** the number of vertices (i.e nodes) and **|E|** the number of edges.
///
/// [pr]: https://en.wikipedia.org/wiki/PageRank
///
/// # Example
/// ```rust
/// use petgraph::Graph;
/// use petgraph::algo::page_rank;
/// let mut g: Graph<(), usize> = Graph::new();
/// assert_eq!(page_rank(&g, 0.5_f64, 1), vec![]); // empty graphs have no node ranks.
/// let a = g.add_node(());
/// let b = g.add_node(());
/// let c = g.add_node(());
/// let d = g.add_node(());
/// let e = g.add_node(());
/// g.extend_with_edges(&[(0, 1), (0, 3), (1, 2), (1, 3)]);
/// // With the following dot representation.
/// //digraph {
/// // 0 [ label = "()" ]
/// // 1 [ label = "()" ]
/// // 2 [ label = "()" ]
/// // 3 [ label = "()" ]
/// // 4 [ label = "()" ]
/// // 0 -> 1 [ label = "0.0" ]
/// // 0 -> 3 [ label = "0.0" ]
/// // 1 -> 2 [ label = "0.0" ]
/// // 1 -> 3 [ label = "0.0" ]
/// //}
/// let damping_factor = 0.7_f32;
/// let number_iterations = 10;
/// let output_ranks = page_rank(&g, damping_factor, number_iterations);
/// let expected_ranks = vec![0.14685437, 0.20267677, 0.22389607, 0.27971846, 0.14685437];
/// assert_eq!(expected_ranks, output_ranks);
/// ```
pub fn page_rank<G, D>(graph: G, damping_factor: D, nb_iter: usize) -> Vec<D>
where
G: NodeCount + IntoEdges + NodeIndexable,
D: UnitMeasure + Copy,
{
let node_count = graph.node_count();
if node_count == 0 {
return vec![];
}
assert!(
D::zero() <= damping_factor && damping_factor <= D::one(),
"Damping factor should be between 0 et 1."
);
let nb = D::from_usize(node_count);
let mut ranks = vec![D::one() / nb; node_count];
let nodeix = |i| graph.from_index(i);
let out_degrees: Vec<D> = (0..node_count)
.map(|i| graph.edges(nodeix(i)).map(|_| D::one()).sum::<D>())
.collect();

for _ in 0..nb_iter {
let pi = (0..node_count)
.enumerate()
.map(|(v, _)| {
ranks
.iter()
.enumerate()
.map(|(w, r)| {
let mut w_out_edges = graph.edges(nodeix(w));
if let Some(_) = w_out_edges.find(|e| e.target() == nodeix(v)) {
damping_factor * *r / out_degrees[w]
} else if out_degrees[w] == D::zero() {
damping_factor * *r / nb // stochastic matrix condition
} else {
(D::one() - damping_factor) * *r / nb // random jumps
}
})
.sum::<D>()
})
.collect::<Vec<D>>();
let sum = pi.iter().map(|score| *score).sum::<D>();
ranks = pi.iter().map(|r| *r / sum).collect::<Vec<D>>();
}
ranks
}

#[allow(dead_code)]
fn out_edges_info<G, D>(graph: G, index_w: usize, index_v: usize) -> (D, bool)
where
G: NodeCount + IntoEdges + NodeIndexable + std::marker::Sync,
D: UnitMeasure + Copy + std::marker::Send + std::marker::Sync,
{
let node_w = graph.from_index(index_w);
let node_v = graph.from_index(index_v);
let mut out_edges = graph.edges(node_w);
let mut out_edge = out_edges.next();
let mut out_degree = D::zero();
let mut flag_points_to = false;
while let Some(edge) = out_edge {
out_degree = out_degree + D::one();
if edge.target() == node_v {
flag_points_to = true;
}
out_edge = out_edges.next();
}
(out_degree, flag_points_to)
}
/// \[Generic\] Parallel Page Rank algorithm.
///
/// See [`page_rank`].
#[cfg(feature = "rayon")]
pub fn parallel_page_rank<G, D>(
graph: G,
damping_factor: D,
nb_iter: usize,
tol: Option<D>,
) -> Vec<D>
where
G: NodeCount + IntoEdges + NodeIndexable + std::marker::Sync,
D: UnitMeasure + Copy + std::marker::Send + std::marker::Sync,
{
let node_count = graph.node_count();
if node_count == 0 {
return vec![];
}
assert!(
D::zero() <= damping_factor && damping_factor <= D::one(),
"Damping factor should be between 0 et 1."
);
let mut tolerance = D::default_tol();
if let Some(_tol) = tol {
tolerance = _tol;
}
let nb = D::from_usize(node_count);
let mut ranks: Vec<D> = (0..node_count)
.into_par_iter()
.map(|i| D::one() / nb)
.collect();
for _ in 0..nb_iter {
let pi = (0..node_count)
.into_par_iter()
.map(|v| {
ranks
.iter()
.enumerate()
.map(|(w, r)| {
let (out_deg, w_points_to_v) = out_edges_info(graph, w, v);
if w_points_to_v {
damping_factor * *r / out_deg
} else if out_deg == D::zero() {
damping_factor * *r / nb // stochastic matrix condition
} else {
(D::one() - damping_factor) * *r / nb // random jumps
}
})
.sum::<D>()
})
.collect::<Vec<D>>();
let sum = pi.par_iter().map(|score| *score).sum::<D>();
let new_ranks = pi.par_iter().map(|r| *r / sum).collect::<Vec<D>>();
let squared_norm_2 = new_ranks
.par_iter()
.zip(&ranks)
.map(|(new, old)| (*new - *old) * (*new - *old))
.sum::<D>();
if squared_norm_2 <= tolerance {
return ranks;
} else {
ranks = new_ranks;
}
}
ranks
}
83 changes: 83 additions & 0 deletions tests/page_rank.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use petgraph::{algo::page_rank, Graph};

#[cfg(feature = "rayon")]
use petgraph::algo::page_rank::parallel_page_rank;

fn graph_example() -> Graph<String, f32> {
// Taken and adapted from https://github.com/neo4j-labs/graph?tab=readme-ov-file#how-to-run-algorithms
let mut graph = Graph::<_, f32>::new();
graph.add_node("A".to_owned());
graph.add_node("B".to_owned());
graph.add_node("C".to_owned());
graph.add_node("D".to_owned());
graph.add_node("E".to_owned());
graph.add_node("F".to_owned());
graph.add_node("G".to_owned());
graph.add_node("H".to_owned());
graph.add_node("I".to_owned());
graph.add_node("J".to_owned());
graph.add_node("K".to_owned());
graph.add_node("L".to_owned());
graph.add_node("M".to_owned());
graph.extend_with_edges(&[
(1, 2), // B->C
(2, 1), // C->B
(4, 0), // D->A
(4, 1), // D->B
(5, 4), // E->D
(5, 1), // E->B
(5, 6), // E->F
(6, 1), // F->B
(6, 5), // F->E
(7, 1), // G->B
(7, 5), // F->E
(8, 1), // G->B
(8, 5), // G->E
(9, 1), // H->B
(9, 5), // H->E
(10, 1), // I->B
(10, 5), // I->E
(11, 5), // J->B
(12, 5), // K->B
]);
graph
}

fn expected_ranks() -> Vec<f32> {
vec![
0.029228685,
0.38176042,
0.3410649,
0.014170233,
0.035662483,
0.077429585,
0.035662483,
0.014170233,
0.014170233,
0.014170233,
0.014170233,
0.014170233,
0.014170233,
]
}

#[test]
fn test_page_rank() {
let graph = graph_example();
let output_ranks = page_rank(&graph, 0.85_f32, 100);
assert_eq!(expected_ranks(), output_ranks);
}

#[test]
#[cfg(feature = "rayon")]

fn test_par_page_rank() {
let graph = graph_example();
let output_ranks = parallel_page_rank(&graph, 0.85_f32, 100, Some(1e-12));
assert!(!expected_ranks()
.iter()
.zip(output_ranks)
.any(|(expected, computed)| ((expected - computed).abs() > 1e-6)
|| computed.is_nan()
|| expected.is_nan()));
}
20 changes: 19 additions & 1 deletion tests/quickcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use petgraph::algo::{
bellman_ford, condensation, dijkstra, find_negative_cycle, floyd_warshall,
greedy_feedback_arc_set, greedy_matching, is_cyclic_directed, is_cyclic_undirected,
is_isomorphic, is_isomorphic_matching, k_shortest_path, kosaraju_scc, maximum_matching,
min_spanning_tree, tarjan_scc, toposort, Matching,
min_spanning_tree, page_rank, tarjan_scc, toposort, Matching,
};
use petgraph::data::FromElements;
use petgraph::dot::{Config, Dot};
Expand Down Expand Up @@ -1312,3 +1312,21 @@ quickcheck! {
true
}
}

quickcheck! {
// The ranks are probabilities,
// as such they are positive and they should sum up to 1.
fn test_page_rank_proba(gr: Graph<(), f32>) -> bool {
if gr.node_count() == 0 {
return true;
}
let tol = 1e-10;
let ranks: Vec<f64> = page_rank(&gr, 0.85_f64, 5);
let at_least_one_neg_rank = ranks.iter().any(|rank| *rank < 0.);
let not_sumup_to_one = (ranks.iter().sum::<f64>() - 1.).abs() > tol;
if at_least_one_neg_rank | not_sumup_to_one{
return false;
}
true
}
}

0 comments on commit 4678de4

Please sign in to comment.