From 1517ffa66af3650ce7f9d991baf18f35b56850e4 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 18 Jul 2018 11:39:50 +0100 Subject: [PATCH 01/12] test showing just normal PageRank --- .../impl/PersonalizedPageRankTest.java | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java new file mode 100644 index 000000000..4e3ec87f4 --- /dev/null +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -0,0 +1,175 @@ +/** + * Copyright (c) 2017 "Neo4j, Inc." + * + * This file is part of Neo4j Graph Algorithms . + * + * Neo4j Graph Algorithms is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.graphalgo.impl; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.neo4j.graphalgo.TestDatabaseCreator; +import org.neo4j.graphalgo.api.Graph; +import org.neo4j.graphalgo.api.GraphFactory; +import org.neo4j.graphalgo.core.GraphLoader; +import org.neo4j.graphalgo.core.heavyweight.HeavyCypherGraphFactory; +import org.neo4j.graphalgo.core.heavyweight.HeavyGraphFactory; +import org.neo4j.graphalgo.core.huge.HugeGraphFactory; +import org.neo4j.graphalgo.core.neo4jview.GraphViewFactory; +import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.Label; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Transaction; +import org.neo4j.kernel.internal.GraphDatabaseAPI; + +import java.util.*; +import java.util.stream.IntStream; + +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public final class PersonalizedPageRankTest { + + private Class graphImpl; + + @Parameterized.Parameters(name = "{1}") + public static Collection data() { + return Arrays.asList( + new Object[]{HeavyGraphFactory.class, "HeavyGraphFactory"}, + new Object[]{HeavyCypherGraphFactory.class, "HeavyCypherGraphFactory"} + ); + } + + private static final String DB_CYPHER = "" + + "CREATE (john:Person {name:\"John\"})\n" + + "CREATE (mary:Person {name:\"Mary\"})\n" + + "CREATE (jill:Person {name:\"Jill\"})\n" + + "CREATE (todd:Person {name:\"Todd\"})\n" + + + "CREATE (iphone:Product {name:\"iPhone5\"})\n" + + "CREATE (kindle:Product {name:\"Kindle Fire\"})\n" + + "CREATE (fitbit:Product {name:\"Fitbit Flex Wireless\"})\n" + + "CREATE (potter:Product {name:\"Harry Potter\"})\n" + + "CREATE (hobbit:Product {name:\"Hobbit\"})\n" + + + "CREATE\n" + + " (john)-[:PURCHASED]->(iphone),\n" + + " (john)<-[:PURCHASED_BY]-(iphone),\n" + + + " (john)-[:PURCHASED]->(kindle),\n" + + " (john)<-[:PURCHASED_BY]-(kindle),\n" + + + " (mary)-[:PURCHASED]->(iphone),\n" + + " (mary)<-[:PURCHASED_BY]-(iphone),\n" + + + " (mary)-[:PURCHASED]->(kindle),\n" + + " (mary)<-[:PURCHASED_BY]-(kindle),\n" + + + " (mary)-[:PURCHASED]->(fitbit),\n" + + " (mary)<-[:PURCHASED_BY]-(fitbit),\n" + + + " (jill)-[:PURCHASED]->(iphone),\n" + + " (jill)<-[:PURCHASED_BY]-(iphone),\n" + + + " (jill)-[:PURCHASED]->(kindle),\n" + + " (jill)<-[:PURCHASED_BY]-(kindle),\n" + + + " (jill)-[:PURCHASED]->(fitbit),\n" + + " (jill)<-[:PURCHASED_BY]-(fitbit),\n" + + + " (todd)-[:PURCHASED]->(fitbit),\n" + + " (todd)<-[:PURCHASED_BY]-(fitbit),\n" + + + " (todd)-[:PURCHASED]->(potter),\n" + + " (todd)<-[:PURCHASED_BY]-(potter),\n" + + + " (todd)-[:PURCHASED]->(hobbit),\n" + + " (todd)<-[:PURCHASED_BY]-(hobbit)"; + + private static GraphDatabaseAPI db; + + @BeforeClass + public static void setupGraph() { + db = TestDatabaseCreator.createTestDatabase(); + try (Transaction tx = db.beginTx()) { + db.execute(DB_CYPHER).close(); + tx.success(); + } + } + + @AfterClass + public static void shutdownGraph() throws Exception { + if (db!=null) db.shutdown(); + } + + public PersonalizedPageRankTest( + Class graphImpl, + String nameIgnoredOnlyForTestName) { + this.graphImpl = graphImpl; + } + + @Test + public void test() throws Exception { + final Graph graph; + if (graphImpl.isAssignableFrom(HeavyCypherGraphFactory.class)) { + graph = new GraphLoader(db) + .withLabel("MATCH (n:Label1) RETURN id(n) as id") + .withRelationshipType("MATCH (n:Label1)-[:TYPE1]->(m:Label1) RETURN id(n) as source,id(m) as target") + .load(graphImpl); + + } else { + graph = new GraphLoader(db) + .withDirection(Direction.BOTH) + .load(graphImpl); + } + + final PageRankResult rankResult = PageRankAlgorithm + .of(graph, 0.85) + .compute(40) + .result(); + + Map results = new TreeMap<>(); + + try(Transaction tx = db.beginTx()) { + for (Node node : db.getAllNodes()) { + double score = rankResult.score(node.getId()); + results.put(node.getProperty("name").toString(), score); + + } + } + + Map longDoubleMap = sortByValue(results); + for (String aLong : longDoubleMap.keySet()) { + System.out.println("aLong = " + aLong + " => " + longDoubleMap.get(aLong)); + } + + + } + + private static > Map sortByValue(Map map) { + List> list = new ArrayList<>(map.entrySet()); + list.sort(Map.Entry.comparingByValue(Comparator.reverseOrder())); + + Map result = new LinkedHashMap<>(); + for (Map.Entry entry : list) { + result.put(entry.getKey(), entry.getValue()); + } + + return result; + } +} From 71de16e1bf803b75458a8fd0c9918145d9b7d5fb Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 18 Jul 2018 13:18:37 +0100 Subject: [PATCH 02/12] PPR test WIP --- .../impl/PersonalizedPageRankTest.java | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index 4e3ec87f4..348f984cd 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -51,10 +51,11 @@ public final class PersonalizedPageRankTest { public static Collection data() { return Arrays.asList( new Object[]{HeavyGraphFactory.class, "HeavyGraphFactory"}, - new Object[]{HeavyCypherGraphFactory.class, "HeavyCypherGraphFactory"} + new Object[]{HeavyCypherGraphFactory.class, "HeavyCypherGraphFactory"}, + new Object[]{HugeGraphFactory.class, "HugeGraphFactory"}, + new Object[]{GraphViewFactory.class, "GraphViewFactory"} ); } - private static final String DB_CYPHER = "" + "CREATE (john:Person {name:\"John\"})\n" + "CREATE (mary:Person {name:\"Mary\"})\n" + @@ -128,8 +129,8 @@ public void test() throws Exception { final Graph graph; if (graphImpl.isAssignableFrom(HeavyCypherGraphFactory.class)) { graph = new GraphLoader(db) - .withLabel("MATCH (n:Label1) RETURN id(n) as id") - .withRelationshipType("MATCH (n:Label1)-[:TYPE1]->(m:Label1) RETURN id(n) as source,id(m) as target") + .withLabel("MATCH (n) RETURN id(n) as id") + .withRelationshipType("MATCH (n)-->(m) RETURN id(n) as source,id(m) as target") .load(graphImpl); } else { @@ -143,22 +144,33 @@ public void test() throws Exception { .compute(40) .result(); - Map results = new TreeMap<>(); + Map prs = new TreeMap<>(); try(Transaction tx = db.beginTx()) { for (Node node : db.getAllNodes()) { double score = rankResult.score(node.getId()); - results.put(node.getProperty("name").toString(), score); - + prs.put(node.getProperty("name").toString(), score); } } - Map longDoubleMap = sortByValue(results); - for (String aLong : longDoubleMap.keySet()) { - System.out.println("aLong = " + aLong + " => " + longDoubleMap.get(aLong)); + Map sortedPrs = sortByValue(prs); + for (String name : sortedPrs.keySet()) { + System.out.println(name + " => " + sortedPrs.get(name)); } - + /* + + Personalised PageRank + John 0.2495885915 + iPhone5 0.1757435084 + Kindle Fire 0.1757435084 + Mary 0.1229457566 + Jill 0.1229457566 + Fitbit Flex Wireless 0.0824359888 + Todd 0.0450622296 + Harry Potter 0.0127673300 + Hobbit 0.0127673300 + */ } private static > Map sortByValue(Map map) { From c11b7e7d794f7d1e412900caf9646a2b28f2cac9 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 18 Jul 2018 13:37:28 +0100 Subject: [PATCH 03/12] wip --- .../main/java/org/neo4j/graphalgo/PageRankProc.java | 6 ++++++ .../main/java/org/neo4j/graphalgo/impl/PageRank.java | 4 +++- .../org/neo4j/graphalgo/impl/PageRankAlgorithm.java | 12 +++++++++--- .../graphalgo/impl/PersonalizedPageRankTest.java | 5 ++--- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java index ecdfc340b..dea58ee56 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java +++ b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java @@ -33,6 +33,7 @@ import org.neo4j.graphalgo.impl.PageRankAlgorithm; import org.neo4j.graphalgo.results.PageRankScore; import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.Node; import org.neo4j.kernel.api.KernelTransaction; import org.neo4j.kernel.internal.GraphDatabaseAPI; import org.neo4j.logging.Log; @@ -42,6 +43,7 @@ import org.neo4j.procedure.Name; import org.neo4j.procedure.Procedure; +import java.util.List; import java.util.Map; import java.util.stream.IntStream; import java.util.stream.LongStream; @@ -189,6 +191,10 @@ private PageRankResult evaluate( .withLog(log) .withTerminationFlag(terminationFlag); + List sourceNodes = (List) configuration.get("sourceNodes"); + Stream sourceNodeIds = sourceNodes.stream().map(Node::getId); + + statsBuilder.timeEval(() -> prAlgo.compute(iterations)); statsBuilder diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java index 3120e0068..306f87207 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java @@ -106,7 +106,8 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { NodeIterator nodeIterator, RelationshipIterator relationshipIterator, Degrees degrees, - double dampingFactor) { + double dampingFactor, + Stream sourceNodeIds) { this( null, -1, @@ -509,6 +510,7 @@ private void synchronizeScores(int[] allScores) { int length = allScores.length; for (int i = 0; i < length; i++) { int sum = allScores[i]; + double delta = dampingFactor * (sum / 100_000.0); pageRank[i] += delta; deltas[i] = delta; diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java index 3aa223444..b4faeed1e 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java @@ -23,6 +23,7 @@ import org.neo4j.graphalgo.core.utils.paged.AllocationTracker; import java.util.concurrent.ExecutorService; +import java.util.stream.Stream; public interface PageRankAlgorithm { @@ -34,12 +35,14 @@ public interface PageRankAlgorithm { static PageRankAlgorithm of( Graph graph, + Stream sourceNodeIds, double dampingFactor) { - return of(AllocationTracker.EMPTY, graph, dampingFactor); + return of(AllocationTracker.EMPTY, sourceNodeIds, graph, dampingFactor); } static PageRankAlgorithm of( AllocationTracker tracker, + Stream sourceNodeIds, Graph graph, double dampingFactor) { if (graph instanceof HugeGraph) { @@ -52,16 +55,18 @@ static PageRankAlgorithm of( static PageRankAlgorithm of( Graph graph, double dampingFactor, + Stream sourceNodeIds, ExecutorService pool, int concurrency, int batchSize) { - return of(AllocationTracker.EMPTY, graph, dampingFactor, pool, concurrency, batchSize); + return of(AllocationTracker.EMPTY, graph, dampingFactor, sourceNodeIds, pool, concurrency, batchSize); } static PageRankAlgorithm of( AllocationTracker tracker, Graph graph, double dampingFactor, + Stream sourceNodeIds, ExecutorService pool, int concurrency, int batchSize) { @@ -86,6 +91,7 @@ static PageRankAlgorithm of( graph, graph, graph, - dampingFactor); + dampingFactor, + sourceNodeIds); } } diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index 348f984cd..bf8b22bd3 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -32,13 +32,12 @@ import org.neo4j.graphalgo.core.huge.HugeGraphFactory; import org.neo4j.graphalgo.core.neo4jview.GraphViewFactory; import org.neo4j.graphdb.Direction; -import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Transaction; import org.neo4j.kernel.internal.GraphDatabaseAPI; import java.util.*; -import java.util.stream.IntStream; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -140,7 +139,7 @@ public void test() throws Exception { } final PageRankResult rankResult = PageRankAlgorithm - .of(graph, 0.85) + .of(graph,Stream.of(0L), 0.85) .compute(40) .result(); From 2c3527c85a6fa2d83018732540df94a4403463f6 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 18 Jul 2018 15:45:21 +0100 Subject: [PATCH 04/12] more wip --- .../org/neo4j/graphalgo/PageRankProc.java | 8 +- .../org/neo4j/graphalgo/impl/PageRank.java | 72 ++--- .../graphalgo/impl/PageRankAlgorithm.java | 17 +- .../algo/PageRankProcIntegrationTest.java | 9 +- ...rsonalizedPageRankProcIntegrationTest.java | 254 ++++++++++++++++++ .../neo4j/graphalgo/impl/PageRankTest.java | 3 +- .../graphalgo/impl/PageRankWikiTest.java | 3 +- .../impl/PersonalizedPageRankTest.java | 26 +- 8 files changed, 344 insertions(+), 48 deletions(-) create mode 100644 tests/src/test/java/org/neo4j/graphalgo/algo/PersonalizedPageRankProcIntegrationTest.java diff --git a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java index dea58ee56..912bd9c31 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java +++ b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java @@ -43,6 +43,7 @@ import org.neo4j.procedure.Name; import org.neo4j.procedure.Procedure; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.IntStream; @@ -179,10 +180,14 @@ private PageRankResult evaluate( final int concurrency = configuration.getConcurrency(Pools.getNoThreadsInDefaultPool()); log.debug("Computing page rank with damping of " + dampingFactor + " and " + iterations + " iterations."); + + List sourceNodes = configuration.get("sourceNodes", new ArrayList<>()); + Stream sourceNodeIds = sourceNodes.stream().map(Node::getId); PageRankAlgorithm prAlgo = PageRankAlgorithm.of( tracker, graph, dampingFactor, + sourceNodeIds, Pools.DEFAULT, concurrency, batchSize); @@ -191,9 +196,6 @@ private PageRankResult evaluate( .withLog(log) .withTerminationFlag(terminationFlag); - List sourceNodes = (List) configuration.get("sourceNodes"); - Stream sourceNodeIds = sourceNodes.stream().map(Node::getId); - statsBuilder.timeEval(() -> prAlgo.compute(iterations)); diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java index 306f87207..9ab068fae 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java @@ -1,18 +1,18 @@ /** * Copyright (c) 2017 "Neo4j, Inc." - * + *

* This file is part of Neo4j Graph Algorithms . - * + *

* Neo4j Graph Algorithms is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + *

* This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + *

* You should have received a copy of the GNU General Public License * along with this program. If not, see . */ @@ -20,11 +20,7 @@ import com.carrotsearch.hppc.IntArrayList; import org.neo4j.collection.primitive.PrimitiveIntIterator; -import org.neo4j.graphalgo.api.Degrees; -import org.neo4j.graphalgo.api.IdMapping; -import org.neo4j.graphalgo.api.NodeIterator; -import org.neo4j.graphalgo.api.RelationshipConsumer; -import org.neo4j.graphalgo.api.RelationshipIterator; +import org.neo4j.graphalgo.api.*; import org.neo4j.graphalgo.core.utils.ParallelUtil; import org.neo4j.graphalgo.core.utils.Pools; import org.neo4j.graphalgo.core.write.Exporter; @@ -38,6 +34,8 @@ import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup; @@ -101,22 +99,16 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { * Forces sequential use. If you want parallelism, prefer * {@link #PageRank(ExecutorService, int, int, IdMapping, NodeIterator, RelationshipIterator, Degrees, double)} */ - PageRank( - IdMapping idMapping, - NodeIterator nodeIterator, - RelationshipIterator relationshipIterator, - Degrees degrees, + PageRank(Graph graph, double dampingFactor, Stream sourceNodeIds) { this( null, -1, ParallelUtil.DEFAULT_BATCH_SIZE, - idMapping, - nodeIterator, - relationshipIterator, - degrees, - dampingFactor); + graph, + dampingFactor, + sourceNodeIds); } /** @@ -128,28 +120,27 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { ExecutorService executor, int concurrency, int batchSize, - IdMapping idMapping, - NodeIterator nodeIterator, - RelationshipIterator relationshipIterator, - Degrees degrees, - double dampingFactor) { + Graph graph, + double dampingFactor, + Stream sourceNodeIds) { List partitions; if (ParallelUtil.canRunInParallel(executor)) { partitions = partitionGraph( adjustBatchSize(batchSize), - idMapping, - nodeIterator, - degrees); + graph, + graph, + graph); } else { executor = null; - partitions = createSinglePartition(idMapping, degrees); + partitions = createSinglePartition(graph, graph); } computeSteps = createComputeSteps( concurrency, dampingFactor, - relationshipIterator, - degrees, + sourceNodeIds.map(graph::toMappedNodeId).collect(Collectors.toList()), + graph, + graph, partitions, executor); } @@ -221,6 +212,7 @@ private List createSinglePartition( private ComputeSteps createComputeSteps( int concurrency, double dampingFactor, + List sourceNodeIds, RelationshipIterator relationshipIterator, Degrees degrees, List partitions, @@ -253,6 +245,7 @@ private ComputeSteps createComputeSteps( computeSteps.add(new ComputeStep( dampingFactor, + sourceNodeIds, relationshipIterator, degrees, partitionCount, @@ -390,6 +383,7 @@ private static final class ComputeStep implements Runnable, RelationshipConsumer private int[] starts; private int[] lengths; + private List sourceNodeIds; private final RelationshipIterator relationshipIterator; private final Degrees degrees; @@ -409,12 +403,14 @@ private static final class ComputeStep implements Runnable, RelationshipConsumer ComputeStep( double dampingFactor, + List sourceNodeIds, RelationshipIterator relationshipIterator, Degrees degrees, int partitionSize, int startNode) { this.dampingFactor = dampingFactor; this.alpha = 1.0 - dampingFactor; + this.sourceNodeIds = sourceNodeIds; this.relationshipIterator = relationshipIterator; this.degrees = degrees; this.partitionSize = partitionSize; @@ -447,7 +443,21 @@ private void initialize() { Arrays.setAll(nextScores, i -> new int[lengths[i]]); double[] partitionRank = new double[partitionSize]; - Arrays.fill(partitionRank, alpha); + + if(sourceNodeIds.size() == 0) { + Arrays.fill(partitionRank, alpha); + } else { + Arrays.fill(partitionRank,0); + + List partitionSourceNodeIds = sourceNodeIds.stream() + .filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId <= endNode) + .collect(Collectors.toList()); + + for (int sourceNodeId : partitionSourceNodeIds) { + partitionRank[sourceNodeId] = alpha; + } + } + this.pageRank = partitionRank; this.deltas = Arrays.copyOf(partitionRank, partitionSize); diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java index b4faeed1e..702435c61 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java @@ -34,22 +34,22 @@ public interface PageRankAlgorithm { Algorithm algorithm(); static PageRankAlgorithm of( - Graph graph, - Stream sourceNodeIds, - double dampingFactor) { - return of(AllocationTracker.EMPTY, sourceNodeIds, graph, dampingFactor); + Graph graph, + double dampingFactor, + Stream sourceNodeIds) { + return of(AllocationTracker.EMPTY, dampingFactor, sourceNodeIds, graph); } static PageRankAlgorithm of( AllocationTracker tracker, + double dampingFactor, Stream sourceNodeIds, - Graph graph, - double dampingFactor) { + Graph graph) { if (graph instanceof HugeGraph) { HugeGraph huge = (HugeGraph) graph; return new HugePageRank(tracker, huge, huge, huge, huge, dampingFactor); } - return new PageRank(graph, graph, graph, graph, dampingFactor); + return new PageRank(graph, dampingFactor, sourceNodeIds); } static PageRankAlgorithm of( @@ -88,9 +88,6 @@ static PageRankAlgorithm of( concurrency, batchSize, graph, - graph, - graph, - graph, dampingFactor, sourceNodeIds); } diff --git a/tests/src/test/java/org/neo4j/graphalgo/algo/PageRankProcIntegrationTest.java b/tests/src/test/java/org/neo4j/graphalgo/algo/PageRankProcIntegrationTest.java index cb4000cd1..18f0ce50a 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/algo/PageRankProcIntegrationTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/algo/PageRankProcIntegrationTest.java @@ -207,7 +207,14 @@ public void testPageRankParallelExecution() throws Exception { private static void runQuery( String query, Consumer check) { - try (Result result = db.execute(query)) { + runQuery(query, new HashMap<>(), check); + } + + private static void runQuery( + String query, + Map params, + Consumer check) { + try (Result result = db.execute(query, params)) { result.accept(row -> { check.accept(row); return true; diff --git a/tests/src/test/java/org/neo4j/graphalgo/algo/PersonalizedPageRankProcIntegrationTest.java b/tests/src/test/java/org/neo4j/graphalgo/algo/PersonalizedPageRankProcIntegrationTest.java new file mode 100644 index 000000000..82447144a --- /dev/null +++ b/tests/src/test/java/org/neo4j/graphalgo/algo/PersonalizedPageRankProcIntegrationTest.java @@ -0,0 +1,254 @@ +/** + * Copyright (c) 2017 "Neo4j, Inc." + * + * This file is part of Neo4j Graph Algorithms . + * + * Neo4j Graph Algorithms is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.graphalgo.algo; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.neo4j.graphalgo.PageRankProc; +import org.neo4j.graphalgo.TestDatabaseCreator; +import org.neo4j.graphdb.Label; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Result; +import org.neo4j.graphdb.Transaction; +import org.neo4j.internal.kernel.api.exceptions.KernelException; +import org.neo4j.kernel.impl.proc.Procedures; +import org.neo4j.kernel.internal.GraphDatabaseAPI; + +import java.util.*; +import java.util.function.Consumer; + +import static org.junit.Assert.*; + +@RunWith(Parameterized.class) +public class PersonalizedPageRankProcIntegrationTest { + + private static GraphDatabaseAPI db; + private static Map expected = new HashMap<>(); + + private static final String DB_CYPHER = "" + + "CREATE (a:Label1 {name:\"a\"})\n" + + "CREATE (b:Label1 {name:\"b\"})\n" + + "CREATE (c:Label1 {name:\"c\"})\n" + + "CREATE (d:Label1 {name:\"d\"})\n" + + "CREATE (e:Label1 {name:\"e\"})\n" + + "CREATE (f:Label1 {name:\"f\"})\n" + + "CREATE (g:Label1 {name:\"g\"})\n" + + "CREATE (h:Label1 {name:\"h\"})\n" + + "CREATE (i:Label1 {name:\"i\"})\n" + + "CREATE (j:Label1 {name:\"j\"})\n" + + "CREATE (k:Label2 {name:\"k\"})\n" + + "CREATE (l:Label2 {name:\"l\"})\n" + + "CREATE (m:Label2 {name:\"m\"})\n" + + "CREATE (n:Label2 {name:\"n\"})\n" + + "CREATE (o:Label2 {name:\"o\"})\n" + + "CREATE (p:Label2 {name:\"p\"})\n" + + "CREATE (q:Label2 {name:\"q\"})\n" + + "CREATE (r:Label2 {name:\"r\"})\n" + + "CREATE (s:Label2 {name:\"s\"})\n" + + "CREATE (t:Label2 {name:\"t\"})\n" + + "CREATE\n" + + " (b)-[:TYPE1{foo:1.0}]->(c),\n" + + " (c)-[:TYPE1{foo:1.2}]->(b),\n" + + " (d)-[:TYPE1{foo:1.3}]->(a),\n" + + " (d)-[:TYPE1{foo:1.7}]->(b),\n" + + " (e)-[:TYPE1{foo:1.1}]->(b),\n" + + " (e)-[:TYPE1{foo:2.2}]->(d),\n" + + " (e)-[:TYPE1{foo:1.5}]->(f),\n" + + " (f)-[:TYPE1{foo:3.5}]->(b),\n" + + " (f)-[:TYPE1{foo:2.9}]->(e),\n" + + " (g)-[:TYPE2{foo:3.2}]->(b),\n" + + " (g)-[:TYPE2{foo:5.3}]->(e),\n" + + " (h)-[:TYPE2{foo:9.5}]->(b),\n" + + " (h)-[:TYPE2{foo:0.3}]->(e),\n" + + " (i)-[:TYPE2{foo:5.4}]->(b),\n" + + " (i)-[:TYPE2{foo:3.2}]->(e),\n" + + " (j)-[:TYPE2{foo:9.5}]->(e),\n" + + " (k)-[:TYPE2{foo:4.2}]->(e)\n"; + + @AfterClass + public static void tearDown() throws Exception { + if (db != null) db.shutdown(); + } + + @BeforeClass + public static void setup() throws KernelException { + db = TestDatabaseCreator.createTestDatabase(); + try (Transaction tx = db.beginTx()) { + db.execute(DB_CYPHER).close(); + tx.success(); + } + + db.getDependencyResolver() + .resolveDependency(Procedures.class) + .registerProcedure(PageRankProc.class); + + + try (Transaction tx = db.beginTx()) { + final Label label = Label.label("Label1"); + expected.put(db.findNode(label, "name", "a").getId(), 0.243); + expected.put(db.findNode(label, "name", "b").getId(), 1.844); + expected.put(db.findNode(label, "name", "c").getId(), 1.777); + expected.put(db.findNode(label, "name", "d").getId(), 0.218); + expected.put(db.findNode(label, "name", "e").getId(), 0.243); + expected.put(db.findNode(label, "name", "f").getId(), 0.218); + expected.put(db.findNode(label, "name", "g").getId(), 0.150); + expected.put(db.findNode(label, "name", "h").getId(), 0.150); + expected.put(db.findNode(label, "name", "i").getId(), 0.150); + expected.put(db.findNode(label, "name", "j").getId(), 0.150); + tx.success(); + } + } + + @Parameterized.Parameters(name = "{0}") + public static Collection data() { + return Arrays.asList( + new Object[]{"Heavy"}, + new Object[]{"Light"}, + new Object[]{"Kernel"}, + new Object[]{"Huge"} + ); + } + + @Parameterized.Parameter + public String graphImpl; + + @Test + public void testPageRankStream() throws Exception { + final Map actual = new HashMap<>(); + runQuery( + "CALL algo.pageRank.stream('Label1', 'TYPE1', {graph:'"+graphImpl+"'}) YIELD node, score", + row -> actual.put( + row.getNode("node").getId(), + (Double) row.get("score"))); + + assertMapEquals(expected, actual); + } + + @Test + public void testPageRankWriteBack() throws Exception { + runQuery( + "CALL algo.pageRank('Label1', 'TYPE1', {graph:'"+graphImpl+"'}) YIELD writeMillis, write, writeProperty", + row -> { + assertTrue(row.getBoolean("write")); + assertEquals("pagerank", row.getString("writeProperty")); + assertTrue( + "write time not set", + row.getNumber("writeMillis").intValue() >= 0); + }); + + assertResult("pagerank"); + } + + @Test + public void testPageRankWriteBackUnderDifferentProperty() throws Exception { + runQuery( + "CALL algo.pageRank('Label1', 'TYPE1', {writeProperty:'foobar', graph:'"+graphImpl+"'}) YIELD writeMillis, write, writeProperty", + row -> { + assertTrue(row.getBoolean("write")); + assertEquals("foobar", row.getString("writeProperty")); + assertTrue( + "write time not set", + row.getNumber("writeMillis").intValue() >= 0); + }); + + assertResult("foobar"); + } + + @Test + public void testPageRankParallelWriteBack() throws Exception { + runQuery( + "CALL algo.pageRank('Label1', 'TYPE1', {batchSize:3, write:true, graph:'"+graphImpl+"'}) YIELD writeMillis, write, writeProperty", + row -> assertTrue( + "write time not set", + row.getNumber("writeMillis").intValue() >= 0)); + + assertResult("pagerank"); + } + + @Test + public void testPageRankParallelExecution() throws Exception { + final Map actual = new HashMap<>(); + runQuery( + "CALL algo.pageRank.stream('Label1', 'TYPE1', {batchSize:2, graph:'"+graphImpl+"'}) YIELD nodeId, node, score", + row -> { + final long nodeId = row.getNumber("nodeId").longValue(); + final Node node = row.getNode("node"); + assertEquals(node.getId(), nodeId); + actual.put(nodeId, (Double) row.get("score")); + }); + assertMapEquals(expected, actual); + } + + private static void runQuery( + String query, + Consumer check) { + runQuery(query, new HashMap<>(), check); + } + + private static void runQuery( + String query, + Map params, + Consumer check) { + try (Result result = db.execute(query, params)) { + result.accept(row -> { + check.accept(row); + return true; + }); + } + } + + private void assertResult(final String scoreProperty) { + try (Transaction tx = db.beginTx()) { + for (Map.Entry entry : expected.entrySet()) { + double score = ((Number) db + .getNodeById(entry.getKey()) + .getProperty(scoreProperty)).doubleValue(); + assertEquals( + "score for " + entry.getKey(), + entry.getValue(), + score, + 0.1); + } + tx.success(); + } + } + + private static void assertMapEquals( + Map expected, + Map actual) { + assertEquals("number of elements", expected.size(), actual.size()); + HashSet expectedKeys = new HashSet<>(expected.keySet()); + for (Map.Entry entry : actual.entrySet()) { + assertTrue( + "unknown key " + entry.getKey(), + expectedKeys.remove(entry.getKey())); + assertEquals( + "value for " + entry.getKey(), + expected.get(entry.getKey()), + entry.getValue(), + 0.1); + } + for (Long expectedKey : expectedKeys) { + fail("missing key " + expectedKey); + } + } +} diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java index a9c0c96a4..76044bba7 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java @@ -41,6 +41,7 @@ import java.util.HashMap; import java.util.Map; import java.util.stream.IntStream; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -157,7 +158,7 @@ public void test() throws Exception { } final PageRankResult rankResult = PageRankAlgorithm - .of(graph, 0.85) + .of(graph, 0.85, Stream.empty()) .compute(40) .result(); diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java index 236c97acc..db114f96c 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java @@ -39,6 +39,7 @@ import java.util.HashMap; import java.util.Map; import java.util.stream.IntStream; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -146,7 +147,7 @@ public void test() throws Exception { .load(graphImpl); final PageRankResult rankResult = PageRankAlgorithm - .of(graph, 0.85) + .of(graph, 0.85, Stream.empty()) .compute(40) .result(); diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index bf8b22bd3..76ea3d1bc 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -31,7 +31,9 @@ import org.neo4j.graphalgo.core.heavyweight.HeavyGraphFactory; import org.neo4j.graphalgo.core.huge.HugeGraphFactory; import org.neo4j.graphalgo.core.neo4jview.GraphViewFactory; +import org.neo4j.graphalgo.core.utils.Pools; import org.neo4j.graphdb.Direction; +import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Transaction; import org.neo4j.kernel.internal.GraphDatabaseAPI; @@ -138,8 +140,15 @@ public void test() throws Exception { .load(graphImpl); } + Stream sourceNodeIds; + try(Transaction tx = db.beginTx()) { + Node node = db.findNode(Label.label("Person"), "name", "Mary"); + sourceNodeIds = Stream.of(node.getId()); + } + + final PageRankResult rankResult = PageRankAlgorithm - .of(graph,Stream.of(0L), 0.85) + .of(graph,0.85, sourceNodeIds, Pools.DEFAULT, 2, 1) .compute(40) .result(); @@ -169,6 +178,21 @@ public void test() throws Exception { Todd 0.0450622296 Harry Potter 0.0127673300 Hobbit 0.0127673300 + + MATCH (u:User {id: "Doug"}) +WITH u, collect(u) AS sourceNodes +CALL algo.pageRank.stream('User', 'FOLLOWS', { + iterations:20, + dampingFactor:0.85, + sourceNodes: sourceNodes +}) +YIELD nodeId, score +MATCH (node) +WHERE id(node) = nodeId +AND node <> u +AND not((u)-[:FOLLOWS]->(node)) +RETURN node.id AS page, score +ORDER BY score DESC */ } From 7b210d585ee89e653c52f9c1177052c1ca02ff67 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 18 Jul 2018 16:40:20 +0100 Subject: [PATCH 05/12] add calculation to huge as well --- .../org/neo4j/graphalgo/PageRankProc.java | 12 +++- .../neo4j/graphalgo/impl/HugePageRank.java | 65 ++++++++++++------- .../graphalgo/impl/PageRankAlgorithm.java | 9 ++- .../impl/PersonalizedPageRankTest.java | 30 ++------- 4 files changed, 59 insertions(+), 57 deletions(-) diff --git a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java index 912bd9c31..12ffde844 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java +++ b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java @@ -107,7 +107,7 @@ public Stream pageRankStream( @Name(value = "relationship", defaultValue = "") String relationship, @Name(value = "config", defaultValue = "{}") Map config) { - ProcedureConfiguration configuration = ProcedureConfiguration.create(config); + ProcedureConfiguration configuration = ProcedureConfiguration.create(config); PageRankScore.Stats.Builder statsBuilder = new PageRankScore.Stats.Builder(); AllocationTracker tracker = AllocationTracker.create(); @@ -153,13 +153,19 @@ private Graph load( AllocationTracker tracker, Class graphFactory, PageRankScore.Stats.Builder statsBuilder, ProcedureConfiguration configuration) { - GraphLoader graphLoader = new GraphLoader(api, Pools.DEFAULT) .init(log, label, relationship, configuration) .withAllocationTracker(tracker) - .withDirection(Direction.OUTGOING) .withoutRelationshipWeights(); + Direction direction = configuration.getDirection(Direction.OUTGOING); + if (direction == Direction.BOTH) { + graphLoader.asUndirected(true); + } else { + graphLoader.withDirection(direction); + } + + try (ProgressTimer timer = statsBuilder.timeLoad()) { Graph graph = graphLoader.load(graphFactory); statsBuilder.withNodes(graph.nodeCount()); diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java index 1c25c6522..0520ed44f 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java @@ -21,11 +21,7 @@ import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.LongArrayList; import org.neo4j.collection.primitive.PrimitiveLongIterator; -import org.neo4j.graphalgo.api.HugeDegrees; -import org.neo4j.graphalgo.api.HugeIdMapping; -import org.neo4j.graphalgo.api.HugeNodeIterator; -import org.neo4j.graphalgo.api.HugeRelationshipConsumer; -import org.neo4j.graphalgo.api.HugeRelationshipIterator; +import org.neo4j.graphalgo.api.*; import org.neo4j.graphalgo.core.utils.ParallelUtil; import org.neo4j.graphalgo.core.utils.paged.AllocationTracker; import org.neo4j.graphalgo.core.write.Exporter; @@ -39,6 +35,8 @@ import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup; import static org.neo4j.graphalgo.core.utils.paged.AllocationTracker.humanReadable; @@ -111,6 +109,8 @@ public class HugePageRank extends Algorithm implements PageRankAlg private final HugeRelationshipIterator relationshipIterator; private final HugeDegrees degrees; private final double dampingFactor; + private final HugeGraph graph; + private Stream sourceNodeIds; private Log log; private ComputeSteps computeSteps; @@ -121,21 +121,17 @@ public class HugePageRank extends Algorithm implements PageRankAlg */ HugePageRank( AllocationTracker tracker, - HugeIdMapping idMapping, - HugeNodeIterator nodeIterator, - HugeRelationshipIterator relationshipIterator, - HugeDegrees degrees, - double dampingFactor) { + HugeGraph graph, + double dampingFactor, + Stream sourceNodeIds) { this( null, -1, ParallelUtil.DEFAULT_BATCH_SIZE, tracker, - idMapping, - nodeIterator, - relationshipIterator, - degrees, - dampingFactor); + graph, + dampingFactor, + sourceNodeIds); } /** @@ -148,20 +144,20 @@ public class HugePageRank extends Algorithm implements PageRankAlg int concurrency, int batchSize, AllocationTracker tracker, - HugeIdMapping idMapping, - HugeNodeIterator nodeIterator, - HugeRelationshipIterator relationshipIterator, - HugeDegrees degrees, - double dampingFactor) { + HugeGraph graph, + double dampingFactor, + Stream sourceNodeIds) { this.executor = executor; this.concurrency = concurrency; this.batchSize = batchSize; this.tracker = tracker; - this.idMapping = idMapping; - this.nodeIterator = nodeIterator; - this.relationshipIterator = relationshipIterator; - this.degrees = degrees; + this.idMapping = graph; + this.nodeIterator = graph; + this.relationshipIterator = graph; + this.degrees = graph; + this.graph = graph; this.dampingFactor = dampingFactor; + this.sourceNodeIds = sourceNodeIds; } /** @@ -209,6 +205,7 @@ private void initializeSteps() { concurrency, idMapping.nodeCount(), dampingFactor, + sourceNodeIds.map(graph::toMappedNodeId).collect(Collectors.toList()), relationshipIterator, degrees, partitions, @@ -246,6 +243,7 @@ private ComputeSteps createComputeSteps( int concurrency, long nodeCount, double dampingFactor, + List sourceNodeIds, HugeRelationshipIterator relationshipIterator, HugeDegrees degrees, List partitions, @@ -281,6 +279,7 @@ private ComputeSteps createComputeSteps( computeSteps.add(new ComputeStep( dampingFactor, + sourceNodeIds, relationshipIterator, degrees, tracker, @@ -542,6 +541,7 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons private long[] starts; private int[] lengths; + private List sourceNodeIds; private final HugeRelationshipIterator relationshipIterator; private final HugeDegrees degrees; private final AllocationTracker tracker; @@ -562,6 +562,7 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons ComputeStep( double dampingFactor, + List sourceNodeIds, HugeRelationshipIterator relationshipIterator, HugeDegrees degrees, AllocationTracker tracker, @@ -569,6 +570,7 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons long startNode) { this.dampingFactor = dampingFactor; this.alpha = 1.0 - dampingFactor; + this.sourceNodeIds = sourceNodeIds; this.relationshipIterator = relationshipIterator.concurrentCopy(); this.degrees = degrees; this.tracker = tracker; @@ -606,8 +608,21 @@ private void initialize() { }); tracker.add(sizeOfDoubleArray(partitionSize) << 1); + double[] partitionRank = new double[partitionSize]; - Arrays.fill(partitionRank, alpha); + if(sourceNodeIds.size() == 0) { + Arrays.fill(partitionRank, alpha); + } else { + Arrays.fill(partitionRank,0); + + List partitionSourceNodeIds = sourceNodeIds.stream() + .filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId <= endNode) + .collect(Collectors.toList()); + + for (int sourceNodeId : partitionSourceNodeIds) { + partitionRank[sourceNodeId] = alpha; + } + } this.pageRank = partitionRank; this.deltas = Arrays.copyOf(partitionRank, partitionSize); diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java index 702435c61..c55e3dca0 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java @@ -47,7 +47,7 @@ static PageRankAlgorithm of( Graph graph) { if (graph instanceof HugeGraph) { HugeGraph huge = (HugeGraph) graph; - return new HugePageRank(tracker, huge, huge, huge, huge, dampingFactor); + return new HugePageRank(tracker, huge, dampingFactor, sourceNodeIds); } return new PageRank(graph, dampingFactor, sourceNodeIds); } @@ -78,10 +78,9 @@ static PageRankAlgorithm of( batchSize, tracker, huge, - huge, - huge, - huge, - dampingFactor); + dampingFactor, + sourceNodeIds + ); } return new PageRank( pool, diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index 76ea3d1bc..52cfce326 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -71,42 +71,22 @@ public static Collection data() { "CREATE\n" + " (john)-[:PURCHASED]->(iphone),\n" + - " (john)<-[:PURCHASED_BY]-(iphone),\n" + - " (john)-[:PURCHASED]->(kindle),\n" + - " (john)<-[:PURCHASED_BY]-(kindle),\n" + - " (mary)-[:PURCHASED]->(iphone),\n" + - " (mary)<-[:PURCHASED_BY]-(iphone),\n" + - " (mary)-[:PURCHASED]->(kindle),\n" + - " (mary)<-[:PURCHASED_BY]-(kindle),\n" + - " (mary)-[:PURCHASED]->(fitbit),\n" + - " (mary)<-[:PURCHASED_BY]-(fitbit),\n" + - " (jill)-[:PURCHASED]->(iphone),\n" + - " (jill)<-[:PURCHASED_BY]-(iphone),\n" + - " (jill)-[:PURCHASED]->(kindle),\n" + - " (jill)<-[:PURCHASED_BY]-(kindle),\n" + - " (jill)-[:PURCHASED]->(fitbit),\n" + - " (jill)<-[:PURCHASED_BY]-(fitbit),\n" + - " (todd)-[:PURCHASED]->(fitbit),\n" + - " (todd)<-[:PURCHASED_BY]-(fitbit),\n" + - " (todd)-[:PURCHASED]->(potter),\n" + - " (todd)<-[:PURCHASED_BY]-(potter),\n" + - - " (todd)-[:PURCHASED]->(hobbit),\n" + - " (todd)<-[:PURCHASED_BY]-(hobbit)"; + " (todd)-[:PURCHASED]->(hobbit)"; private static GraphDatabaseAPI db; @BeforeClass public static void setupGraph() { + System.out.println(DB_CYPHER); db = TestDatabaseCreator.createTestDatabase(); try (Transaction tx = db.beginTx()) { db.execute(DB_CYPHER).close(); @@ -131,18 +111,20 @@ public void test() throws Exception { if (graphImpl.isAssignableFrom(HeavyCypherGraphFactory.class)) { graph = new GraphLoader(db) .withLabel("MATCH (n) RETURN id(n) as id") - .withRelationshipType("MATCH (n)-->(m) RETURN id(n) as source,id(m) as target") + .withRelationshipType("MATCH (n)-[:PURCHASED]-(m) RETURN id(n) as source,id(m) as target") .load(graphImpl); } else { graph = new GraphLoader(db) .withDirection(Direction.BOTH) + .withRelationshipType("PURCHASED") + .asUndirected(true) .load(graphImpl); } Stream sourceNodeIds; try(Transaction tx = db.beginTx()) { - Node node = db.findNode(Label.label("Person"), "name", "Mary"); + Node node = db.findNode(Label.label("Person"), "name", "John"); sourceNodeIds = Stream.of(node.getId()); } From bdf328673f188d2103992b24575e23f8b03e5eac Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 18 Jul 2018 16:43:30 +0100 Subject: [PATCH 06/12] this factory doesn't seem to listen to my undirected suggestion --- .../org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index 52cfce326..50abd2be0 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -53,8 +53,7 @@ public static Collection data() { return Arrays.asList( new Object[]{HeavyGraphFactory.class, "HeavyGraphFactory"}, new Object[]{HeavyCypherGraphFactory.class, "HeavyCypherGraphFactory"}, - new Object[]{HugeGraphFactory.class, "HugeGraphFactory"}, - new Object[]{GraphViewFactory.class, "GraphViewFactory"} + new Object[]{HugeGraphFactory.class, "HugeGraphFactory"} ); } private static final String DB_CYPHER = "" + @@ -86,7 +85,6 @@ public static Collection data() { @BeforeClass public static void setupGraph() { - System.out.println(DB_CYPHER); db = TestDatabaseCreator.createTestDatabase(); try (Transaction tx = db.beginTx()) { db.execute(DB_CYPHER).close(); From 054bf02c2f85850fa5eee38880cd35a17a0e9f59 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 19 Jul 2018 13:34:14 +0100 Subject: [PATCH 07/12] Fixing boxing --- .../org/neo4j/graphalgo/PageRankProc.java | 2 +- .../neo4j/graphalgo/impl/HugePageRank.java | 27 ++++++++++--------- .../org/neo4j/graphalgo/impl/PageRank.java | 20 +++++++------- .../graphalgo/impl/PageRankAlgorithm.java | 9 ++++--- .../neo4j/graphalgo/impl/PageRankTest.java | 3 ++- .../graphalgo/impl/PageRankWikiTest.java | 3 ++- .../impl/PersonalizedPageRankTest.java | 5 ++-- 7 files changed, 39 insertions(+), 30 deletions(-) diff --git a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java index 12ffde844..6ae8edd53 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java +++ b/algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java @@ -188,7 +188,7 @@ private PageRankResult evaluate( List sourceNodes = configuration.get("sourceNodes", new ArrayList<>()); - Stream sourceNodeIds = sourceNodes.stream().map(Node::getId); + LongStream sourceNodeIds = sourceNodes.stream().mapToLong(Node::getId); PageRankAlgorithm prAlgo = PageRankAlgorithm.of( tracker, graph, diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java index 0520ed44f..99f8e196a 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java @@ -35,7 +35,10 @@ import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutorService; +import java.util.function.IntFunction; import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; import java.util.stream.Stream; import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup; @@ -110,7 +113,7 @@ public class HugePageRank extends Algorithm implements PageRankAlg private final HugeDegrees degrees; private final double dampingFactor; private final HugeGraph graph; - private Stream sourceNodeIds; + private LongStream sourceNodeIds; private Log log; private ComputeSteps computeSteps; @@ -123,7 +126,7 @@ public class HugePageRank extends Algorithm implements PageRankAlg AllocationTracker tracker, HugeGraph graph, double dampingFactor, - Stream sourceNodeIds) { + LongStream sourceNodeIds) { this( null, -1, @@ -146,7 +149,7 @@ public class HugePageRank extends Algorithm implements PageRankAlg AllocationTracker tracker, HugeGraph graph, double dampingFactor, - Stream sourceNodeIds) { + LongStream sourceNodeIds) { this.executor = executor; this.concurrency = concurrency; this.batchSize = batchSize; @@ -205,7 +208,7 @@ private void initializeSteps() { concurrency, idMapping.nodeCount(), dampingFactor, - sourceNodeIds.map(graph::toMappedNodeId).collect(Collectors.toList()), + sourceNodeIds.map(graph::toHugeMappedNodeId).toArray(), relationshipIterator, degrees, partitions, @@ -243,7 +246,7 @@ private ComputeSteps createComputeSteps( int concurrency, long nodeCount, double dampingFactor, - List sourceNodeIds, + long[] sourceNodeIds, HugeRelationshipIterator relationshipIterator, HugeDegrees degrees, List partitions, @@ -541,7 +544,7 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons private long[] starts; private int[] lengths; - private List sourceNodeIds; + private long[] sourceNodeIds; private final HugeRelationshipIterator relationshipIterator; private final HugeDegrees degrees; private final AllocationTracker tracker; @@ -562,7 +565,7 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons ComputeStep( double dampingFactor, - List sourceNodeIds, + long[] sourceNodeIds, HugeRelationshipIterator relationshipIterator, HugeDegrees degrees, AllocationTracker tracker, @@ -610,17 +613,17 @@ private void initialize() { tracker.add(sizeOfDoubleArray(partitionSize) << 1); double[] partitionRank = new double[partitionSize]; - if(sourceNodeIds.size() == 0) { + if(sourceNodeIds.length == 0) { Arrays.fill(partitionRank, alpha); } else { Arrays.fill(partitionRank,0); - List partitionSourceNodeIds = sourceNodeIds.stream() + long[] partitionSourceNodeIds = LongStream.of(sourceNodeIds) .filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId <= endNode) - .collect(Collectors.toList()); + .toArray(); - for (int sourceNodeId : partitionSourceNodeIds) { - partitionRank[sourceNodeId] = alpha; + for (long sourceNodeId : partitionSourceNodeIds) { + partitionRank[(int) sourceNodeId] = alpha; } } diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java index 9ab068fae..4fdc0715d 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java @@ -35,6 +35,8 @@ import java.util.List; import java.util.concurrent.ExecutorService; import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; import java.util.stream.Stream; import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup; @@ -101,7 +103,7 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { */ PageRank(Graph graph, double dampingFactor, - Stream sourceNodeIds) { + LongStream sourceNodeIds) { this( null, -1, @@ -122,7 +124,7 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { int batchSize, Graph graph, double dampingFactor, - Stream sourceNodeIds) { + LongStream sourceNodeIds) { List partitions; if (ParallelUtil.canRunInParallel(executor)) { partitions = partitionGraph( @@ -138,7 +140,7 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { computeSteps = createComputeSteps( concurrency, dampingFactor, - sourceNodeIds.map(graph::toMappedNodeId).collect(Collectors.toList()), + sourceNodeIds.mapToInt(graph::toMappedNodeId).toArray(), graph, graph, partitions, @@ -212,7 +214,7 @@ private List createSinglePartition( private ComputeSteps createComputeSteps( int concurrency, double dampingFactor, - List sourceNodeIds, + int[] sourceNodeIds, RelationshipIterator relationshipIterator, Degrees degrees, List partitions, @@ -383,7 +385,7 @@ private static final class ComputeStep implements Runnable, RelationshipConsumer private int[] starts; private int[] lengths; - private List sourceNodeIds; + private int[] sourceNodeIds; private final RelationshipIterator relationshipIterator; private final Degrees degrees; @@ -403,7 +405,7 @@ private static final class ComputeStep implements Runnable, RelationshipConsumer ComputeStep( double dampingFactor, - List sourceNodeIds, + int[] sourceNodeIds, RelationshipIterator relationshipIterator, Degrees degrees, int partitionSize, @@ -444,14 +446,14 @@ private void initialize() { double[] partitionRank = new double[partitionSize]; - if(sourceNodeIds.size() == 0) { + if(sourceNodeIds.length == 0) { Arrays.fill(partitionRank, alpha); } else { Arrays.fill(partitionRank,0); - List partitionSourceNodeIds = sourceNodeIds.stream() + int[] partitionSourceNodeIds = IntStream.of(sourceNodeIds) .filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId <= endNode) - .collect(Collectors.toList()); + .toArray(); for (int sourceNodeId : partitionSourceNodeIds) { partitionRank[sourceNodeId] = alpha; diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java index c55e3dca0..f3ecabe80 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRankAlgorithm.java @@ -23,6 +23,7 @@ import org.neo4j.graphalgo.core.utils.paged.AllocationTracker; import java.util.concurrent.ExecutorService; +import java.util.stream.LongStream; import java.util.stream.Stream; public interface PageRankAlgorithm { @@ -36,14 +37,14 @@ public interface PageRankAlgorithm { static PageRankAlgorithm of( Graph graph, double dampingFactor, - Stream sourceNodeIds) { + LongStream sourceNodeIds) { return of(AllocationTracker.EMPTY, dampingFactor, sourceNodeIds, graph); } static PageRankAlgorithm of( AllocationTracker tracker, double dampingFactor, - Stream sourceNodeIds, + LongStream sourceNodeIds, Graph graph) { if (graph instanceof HugeGraph) { HugeGraph huge = (HugeGraph) graph; @@ -55,7 +56,7 @@ static PageRankAlgorithm of( static PageRankAlgorithm of( Graph graph, double dampingFactor, - Stream sourceNodeIds, + LongStream sourceNodeIds, ExecutorService pool, int concurrency, int batchSize) { @@ -66,7 +67,7 @@ static PageRankAlgorithm of( AllocationTracker tracker, Graph graph, double dampingFactor, - Stream sourceNodeIds, + LongStream sourceNodeIds, ExecutorService pool, int concurrency, int batchSize) { diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java index 76044bba7..9104b9d75 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankTest.java @@ -41,6 +41,7 @@ import java.util.HashMap; import java.util.Map; import java.util.stream.IntStream; +import java.util.stream.LongStream; import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -158,7 +159,7 @@ public void test() throws Exception { } final PageRankResult rankResult = PageRankAlgorithm - .of(graph, 0.85, Stream.empty()) + .of(graph, 0.85, LongStream.empty()) .compute(40) .result(); diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java index db114f96c..f4a0f6b4b 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PageRankWikiTest.java @@ -39,6 +39,7 @@ import java.util.HashMap; import java.util.Map; import java.util.stream.IntStream; +import java.util.stream.LongStream; import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -147,7 +148,7 @@ public void test() throws Exception { .load(graphImpl); final PageRankResult rankResult = PageRankAlgorithm - .of(graph, 0.85, Stream.empty()) + .of(graph, 0.85, LongStream.empty()) .compute(40) .result(); diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index 50abd2be0..cdf32ccef 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -39,6 +39,7 @@ import org.neo4j.kernel.internal.GraphDatabaseAPI; import java.util.*; +import java.util.stream.LongStream; import java.util.stream.Stream; import static org.junit.Assert.assertEquals; @@ -120,10 +121,10 @@ public void test() throws Exception { .load(graphImpl); } - Stream sourceNodeIds; + LongStream sourceNodeIds; try(Transaction tx = db.beginTx()) { Node node = db.findNode(Label.label("Person"), "name", "John"); - sourceNodeIds = Stream.of(node.getId()); + sourceNodeIds = LongStream.of(node.getId()); } From 3df3d3a6593494236ce842c77609a9890ef8da4d Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 19 Jul 2018 13:34:40 +0100 Subject: [PATCH 08/12] fix imports --- .../java/org/neo4j/graphalgo/impl/HugePageRank.java | 10 +--------- .../main/java/org/neo4j/graphalgo/impl/PageRank.java | 8 +------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java index 99f8e196a..eb2c6fd5e 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java @@ -35,19 +35,11 @@ import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutorService; -import java.util.function.IntFunction; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import java.util.stream.LongStream; -import java.util.stream.Stream; import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup; import static org.neo4j.graphalgo.core.utils.paged.AllocationTracker.humanReadable; -import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.shallowSizeOfInstance; -import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfDoubleArray; -import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfIntArray; -import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfLongArray; -import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfObjectArray; +import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.*; /** diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java index 4fdc0715d..a150c7fbf 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java @@ -28,16 +28,10 @@ import org.neo4j.graphalgo.core.write.Translators; import org.neo4j.graphdb.Direction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; +import java.util.*; import java.util.concurrent.ExecutorService; -import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.LongStream; -import java.util.stream.Stream; import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup; From 4c9eab58d542b00ed53fde4cecc77233680639ea Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 19 Jul 2018 13:52:03 +0100 Subject: [PATCH 09/12] fixing a bug in where we set alpha. Offset should be based on our startNode --- .../neo4j/graphalgo/impl/HugePageRank.java | 2 +- .../org/neo4j/graphalgo/impl/PageRank.java | 4 +- .../impl/PersonalizedPageRankTest.java | 93 +++++++------------ 3 files changed, 38 insertions(+), 61 deletions(-) diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java index eb2c6fd5e..821b915c9 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java @@ -615,7 +615,7 @@ private void initialize() { .toArray(); for (long sourceNodeId : partitionSourceNodeIds) { - partitionRank[(int) sourceNodeId] = alpha; + partitionRank[Math.toIntExact(sourceNodeId - this.startNode)] = alpha; } } diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java index a150c7fbf..2758a3ea2 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java @@ -446,11 +446,11 @@ private void initialize() { Arrays.fill(partitionRank,0); int[] partitionSourceNodeIds = IntStream.of(sourceNodeIds) - .filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId <= endNode) + .filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId < endNode) .toArray(); for (int sourceNodeId : partitionSourceNodeIds) { - partitionRank[sourceNodeId] = alpha; + partitionRank[sourceNodeId - this.startNode] = alpha; } } diff --git a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java index cdf32ccef..999a98205 100644 --- a/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java +++ b/tests/src/test/java/org/neo4j/graphalgo/impl/PersonalizedPageRankTest.java @@ -39,6 +39,7 @@ import org.neo4j.kernel.internal.GraphDatabaseAPI; import java.util.*; +import java.util.stream.IntStream; import java.util.stream.LongStream; import java.util.stream.Stream; @@ -58,17 +59,17 @@ public static Collection data() { ); } private static final String DB_CYPHER = "" + - "CREATE (john:Person {name:\"John\"})\n" + - "CREATE (mary:Person {name:\"Mary\"})\n" + - "CREATE (jill:Person {name:\"Jill\"})\n" + - "CREATE (todd:Person {name:\"Todd\"})\n" + - "CREATE (iphone:Product {name:\"iPhone5\"})\n" + "CREATE (kindle:Product {name:\"Kindle Fire\"})\n" + "CREATE (fitbit:Product {name:\"Fitbit Flex Wireless\"})\n" + "CREATE (potter:Product {name:\"Harry Potter\"})\n" + "CREATE (hobbit:Product {name:\"Hobbit\"})\n" + + "CREATE (todd:Person {name:\"Todd\"})\n" + + "CREATE (mary:Person {name:\"Mary\"})\n" + + "CREATE (jill:Person {name:\"Jill\"})\n" + + "CREATE (john:Person {name:\"John\"})\n" + + "CREATE\n" + " (john)-[:PURCHASED]->(iphone),\n" + " (john)-[:PURCHASED]->(kindle),\n" + @@ -106,6 +107,25 @@ public PersonalizedPageRankTest( @Test public void test() throws Exception { + Label personLabel = Label.label("Person"); + Label productLabel = Label.label("Product"); + final Map expected = new HashMap<>(); + + try (Transaction tx = db.beginTx()) { + + expected.put(db.findNode(personLabel, "name", "John").getId(), 0.24851499999999993); + expected.put(db.findNode(personLabel, "name", "Jill").getId(), 0.12135449999999998); + expected.put(db.findNode(personLabel, "name", "Mary").getId(), 0.12135449999999998); + expected.put(db.findNode(personLabel, "name", "Todd").getId(), 0.043511499999999995); + + expected.put(db.findNode(productLabel, "name", "Kindle Fire").getId(), 0.17415649999999996); + expected.put(db.findNode(productLabel, "name", "iPhone5").getId(), 0.17415649999999996); + expected.put(db.findNode(productLabel, "name", "Fitbit Flex Wireless").getId(), 0.08085200000000001); + expected.put(db.findNode(productLabel, "name", "Harry Potter").getId(), 0.01224); + expected.put(db.findNode(productLabel, "name", "Hobbit").getId(), 0.01224); + tx.close(); + } + final Graph graph; if (graphImpl.isAssignableFrom(HeavyCypherGraphFactory.class)) { graph = new GraphLoader(db) @@ -123,69 +143,26 @@ public void test() throws Exception { LongStream sourceNodeIds; try(Transaction tx = db.beginTx()) { - Node node = db.findNode(Label.label("Person"), "name", "John"); + Node node = db.findNode(personLabel, "name", "John"); sourceNodeIds = LongStream.of(node.getId()); } - final PageRankResult rankResult = PageRankAlgorithm .of(graph,0.85, sourceNodeIds, Pools.DEFAULT, 2, 1) .compute(40) .result(); - Map prs = new TreeMap<>(); - - try(Transaction tx = db.beginTx()) { - for (Node node : db.getAllNodes()) { - double score = rankResult.score(node.getId()); - prs.put(node.getProperty("name").toString(), score); - } - } - - Map sortedPrs = sortByValue(prs); - for (String name : sortedPrs.keySet()) { - System.out.println(name + " => " + sortedPrs.get(name)); - } + IntStream.range(0, expected.size()).forEach(i -> { + final long nodeId = graph.toOriginalNodeId(i); + assertEquals( + "Node#" + nodeId, + expected.get(nodeId), + rankResult.score(i), + 1e-2 + ); + }); - /* - - Personalised PageRank - John 0.2495885915 - iPhone5 0.1757435084 - Kindle Fire 0.1757435084 - Mary 0.1229457566 - Jill 0.1229457566 - Fitbit Flex Wireless 0.0824359888 - Todd 0.0450622296 - Harry Potter 0.0127673300 - Hobbit 0.0127673300 - - MATCH (u:User {id: "Doug"}) -WITH u, collect(u) AS sourceNodes -CALL algo.pageRank.stream('User', 'FOLLOWS', { - iterations:20, - dampingFactor:0.85, - sourceNodes: sourceNodes -}) -YIELD nodeId, score -MATCH (node) -WHERE id(node) = nodeId -AND node <> u -AND not((u)-[:FOLLOWS]->(node)) -RETURN node.id AS page, score -ORDER BY score DESC - */ } - private static > Map sortByValue(Map map) { - List> list = new ArrayList<>(map.entrySet()); - list.sort(Map.Entry.comparingByValue(Comparator.reverseOrder())); - Map result = new LinkedHashMap<>(); - for (Map.Entry entry : list) { - result.put(entry.getKey(), entry.getValue()); - } - - return result; - } } From c0a4606a80a2b2957de35d717363eb372c75a305 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 19 Jul 2018 13:55:33 +0100 Subject: [PATCH 10/12] filter unmapped nodes --- algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java | 2 +- algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java index 821b915c9..f6e7f931b 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java @@ -200,7 +200,7 @@ private void initializeSteps() { concurrency, idMapping.nodeCount(), dampingFactor, - sourceNodeIds.map(graph::toHugeMappedNodeId).toArray(), + sourceNodeIds.map(graph::toHugeMappedNodeId).filter(mappedId -> mappedId != -1L).toArray(), relationshipIterator, degrees, partitions, diff --git a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java index 2758a3ea2..38d5d7172 100644 --- a/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java +++ b/algo/src/main/java/org/neo4j/graphalgo/impl/PageRank.java @@ -134,7 +134,7 @@ public class PageRank extends Algorithm implements PageRankAlgorithm { computeSteps = createComputeSteps( concurrency, dampingFactor, - sourceNodeIds.mapToInt(graph::toMappedNodeId).toArray(), + sourceNodeIds.mapToInt(graph::toMappedNodeId).filter(mappedId -> mappedId != -1L).toArray(), graph, graph, partitions, From c68e8ca3ff50572e17ba1d39a585e960d07d5299 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 19 Jul 2018 14:21:07 +0100 Subject: [PATCH 11/12] updating docs to mention PPR --- doc/asciidoc/pagerank.adoc | 36 +++++++++++++++++++++++ doc/asciidoc/personalized-pagerank.adoc | 39 +++++++++++++++++++++++++ doc/asciidoc/scripts/pagerank.cypher | 22 ++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 doc/asciidoc/personalized-pagerank.adoc diff --git a/doc/asciidoc/pagerank.adoc b/doc/asciidoc/pagerank.adoc index 8c7685465..aa9c99d7f 100644 --- a/doc/asciidoc/pagerank.adoc +++ b/doc/asciidoc/pagerank.adoc @@ -124,6 +124,42 @@ As we might expect, the Home page has the highest PageRank because it has incomi We can also see that it's not only the number of incoming links that is important, but also the importance of the pages behind those links. // end::stream-sample-graph-explanation[] +=== Personalized PageRank + +Personalized PageRank is a variation of PageRank which is biased towards a set of `sourceNodes`. +This variant of PageRank is often used as part of https://www.r-bloggers.com/from-random-walks-to-personalized-pagerank/[recommender systems^]. + +The following examples show how to run PageRank centered around 'Site A'. + +.The following will run the algorithm and stream results: +[source,cypher] +---- +include::scripts/pagerank.cypher[tag=ppr-stream-sample-graph] +---- + +.The following will run the algorithm and write back results: +[source,cypher] +---- +include::scripts/pagerank.cypher[tag=ppr-write-sample-graph] +---- + +// tag::ppr-stream-graph-result[] +.Results +[opts="header",cols="1,1"] +|=== +| Name | PageRank +| Home | 0.399 +| Site A | 0.169 +| About | 0.112 +| Product | 0.112 +| Links | 0.112 +| Site B | 0.019 +| Site C | 0.019 +| Site D | 0.019 +|=== +// end::ppr-stream-graph-result[] + + [[algorithms-pagerank-example]] == Example usage diff --git a/doc/asciidoc/personalized-pagerank.adoc b/doc/asciidoc/personalized-pagerank.adoc new file mode 100644 index 000000000..bd9a69f52 --- /dev/null +++ b/doc/asciidoc/personalized-pagerank.adoc @@ -0,0 +1,39 @@ += Personalized PageRank + +// tag::introduction[] +As mentioned above, calculating the exact betweenness centrality on large graphs can be very time consuming. +Therefore, you might choose to use an approximation algorithm that will run much quicker, and still provide useful information. +// end::introduction[] + +.The following will run the algorithm and stream results: +[source,cypher] +---- +include::scripts/pagerank.cypher[tag=ppr-stream-sample-graph] +---- + +.The following will run the algorithm and write back results: +[source,cypher] +---- +include::scripts/pagerank.cypher[tag=ppr-write-ppr-graph] +---- + +// tag::ppr-stream-graph-result[] +.Results +[opts="header",cols="1,1"] +|=== +| Name | PageRank +| Home | 3.232 +| Product | 1.059 +| Links | 1.059 +| About | 1.059 +| Site A | 0.328 +| Site B | 0.328 +| Site C | 0.328 +| Site D | 0.328 +|=== +// end::ppr-stream-graph-result[] + +// tag::stream-ppr-graph-explanation[] +Alice is still the main broker in the network, and Charles is a minor broker, although their centrality score has reduced as the algorithm only considers relationships at a depth of 1. +The others don’t have any influence, because all the shortest paths between pairs of people go via Alice or Charles. +// end::stream-ppr-graph-explanation[] \ No newline at end of file diff --git a/doc/asciidoc/scripts/pagerank.cypher b/doc/asciidoc/scripts/pagerank.cypher index 813167f87..ee6b8326e 100644 --- a/doc/asciidoc/scripts/pagerank.cypher +++ b/doc/asciidoc/scripts/pagerank.cypher @@ -46,6 +46,28 @@ YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, // end::write-sample-graph[] +// tag::ppr-stream-sample-graph[] +MATCH (siteA:Page {name: "Site A"}) + +CALL algo.pageRank.stream('Page', 'LINKS', {iterations:20, dampingFactor:0.85, sourceNodes: [siteA]}) +YIELD nodeId, score + +MATCH (node) WHERE id(node) = nodeId + +RETURN node.name AS page,score +ORDER BY score DESC + +// end::ppr-stream-sample-graph[] + +// tag::ppr-write-sample-graph[] + +MATCH (siteA:Page {name: "Site A"}) +CALL algo.pageRank('Page', 'LINKS', +{iterations:20, dampingFactor:0.85, sourceNodes: [siteA], write: true, writeProperty:"ppr"}) +YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty +RETURN * +// end::ppr-write-sample-graph[] + // tag::cypher-loading[] CALL algo.pageRank( From ac05a14e10152b0d5ade7fb70319e352e07a2e28 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 19 Jul 2018 14:22:27 +0100 Subject: [PATCH 12/12] oops we don't need this --- doc/asciidoc/personalized-pagerank.adoc | 39 ------------------------- 1 file changed, 39 deletions(-) delete mode 100644 doc/asciidoc/personalized-pagerank.adoc diff --git a/doc/asciidoc/personalized-pagerank.adoc b/doc/asciidoc/personalized-pagerank.adoc deleted file mode 100644 index bd9a69f52..000000000 --- a/doc/asciidoc/personalized-pagerank.adoc +++ /dev/null @@ -1,39 +0,0 @@ -= Personalized PageRank - -// tag::introduction[] -As mentioned above, calculating the exact betweenness centrality on large graphs can be very time consuming. -Therefore, you might choose to use an approximation algorithm that will run much quicker, and still provide useful information. -// end::introduction[] - -.The following will run the algorithm and stream results: -[source,cypher] ----- -include::scripts/pagerank.cypher[tag=ppr-stream-sample-graph] ----- - -.The following will run the algorithm and write back results: -[source,cypher] ----- -include::scripts/pagerank.cypher[tag=ppr-write-ppr-graph] ----- - -// tag::ppr-stream-graph-result[] -.Results -[opts="header",cols="1,1"] -|=== -| Name | PageRank -| Home | 3.232 -| Product | 1.059 -| Links | 1.059 -| About | 1.059 -| Site A | 0.328 -| Site B | 0.328 -| Site C | 0.328 -| Site D | 0.328 -|=== -// end::ppr-stream-graph-result[] - -// tag::stream-ppr-graph-explanation[] -Alice is still the main broker in the network, and Charles is a minor broker, although their centrality score has reduced as the algorithm only considers relationships at a depth of 1. -The others don’t have any influence, because all the shortest paths between pairs of people go via Alice or Charles. -// end::stream-ppr-graph-explanation[] \ No newline at end of file