Permalink
Browse files

Adding PlantedPartitionGraph generator for cluster algorithm testing

  • Loading branch information...
1 parent af23f50 commit 33c666906b4d7e9fe6b08ccb79b9ce177cf57d6d @stuarthendren committed Apr 9, 2012
@@ -0,0 +1,110 @@
+package net.stuarthendren.jung.graph;
+
+import java.util.Random;
+
+import edu.uci.ics.jung.graph.UndirectedGraph;
+import edu.uci.ics.jung.graph.UndirectedSparseGraph;
+
+/**
+ * <p>
+ * A planted partition graph is made up of <code>l</code> group each of size <code>k</code>. The probability of two
+ * vertices in the same group having an edge between them is <code>p</code> and otherwise the probability is
+ * <code>q</code> where <code>0 <= q < p <= 1</code>
+ * </p>
+ * <p>
+ * It can be used to test clustering algorithms, the greater the difference between <code>p</code> and <code>q</code>
+ * the easier it should be to detect the planted clusters.
+ * </p>
+ *
+ * @author Stuart Hendren
+ *
+ */
+@SuppressWarnings("serial")
+public class PlantedPartitionGraph extends UndirectedSparseGraph<Integer, Integer> {
+
+ /**
+ * Create a planted partition graph.
+ *
+ *
+ * @param numberOfClusters
+ * the number of groups (or clusters)
+ * @param sizeOfClusters
+ * the number of vertices in a group
+ * @param interClusterProbability
+ * the probability of two vertices in the same group being connected
+ * @param intraClusterProbability
+ * the probability of two vertices not in the same group being connected must be less than interGroup
+ * probability
+ *
+ */
+ public static UndirectedGraph<Integer, Integer> generateGraph(int numberOfClusters, int sizeOfClusters,
+ double interClusterProbability, double intraClusterProbability) {
+ return generateGraph(numberOfClusters, sizeOfClusters, interClusterProbability, intraClusterProbability,
+ System.currentTimeMillis());
+ }
+
+ /**
+ * Create a planted partition graph.
+ *
+ *
+ * @param numberOfClusters
+ * the number of groups (or clusters)
+ * @param sizeOfClusters
+ * the number of vertices in a group
+ * @param interClusterProbability
+ * the probability of two vertices in the same group being connected
+ * @param intraClusterProbability
+ * the probability of two vertices not in the same group being connected must be less than interGroup
+ * probability
+ * @param seed
+ * seed for {@link Random} number generation
+ *
+ */
+ public static UndirectedGraph<Integer, Integer> generateGraph(int numberOfClusters, int sizeOfClusters,
+ double interClusterProbability, double intraClusterProbability, long seed) {
+ if (numberOfClusters < 0) {
+ throw new IllegalArgumentException("Number of groups must be positive");
+ }
+ if (sizeOfClusters < 0) {
+ throw new IllegalArgumentException("Group size must be positive");
+ }
+ if (interClusterProbability < 0.0 || interClusterProbability > 1.0 || intraClusterProbability < 0.0
+ || intraClusterProbability > 1.0) {
+ throw new IllegalArgumentException("Probabilities must be between 0 and 1");
+ }
+ if (interClusterProbability < intraClusterProbability) {
+ throw new IllegalArgumentException("inter group probability must be less than the intra group probability");
+ }
+ UndirectedGraph<Integer, Integer> graph = new UndirectedSparseGraph<Integer, Integer>();
+ return populateGraph(graph, numberOfClusters, sizeOfClusters, interClusterProbability, intraClusterProbability,
+ seed);
+ }
+
+ private static UndirectedGraph<Integer, Integer> populateGraph(UndirectedGraph<Integer, Integer> graph,
+ int numberOfClusters, int sizeOfClusters, double interClusterProbability, double intraClusterProbability,
+ long seed) {
+ int graphSize = numberOfClusters * sizeOfClusters;
+ for (int i = 0; i < graphSize; i++) {
+ graph.addVertex(i);
+ }
+ Random rand = new Random(seed);
+ int edges = 0;
+ for (int i = 0; i < graphSize; i++) {
+ for (int j = 0; j < graphSize; j++) {
+ if (i > j) {
+ if (i % numberOfClusters == j % numberOfClusters) {
+ if (rand.nextDouble() <= interClusterProbability) {
+ graph.addEdge(edges++, i, j);
+ }
+ } else {
+ if (rand.nextDouble() <= intraClusterProbability) {
+ graph.addEdge(edges++, i, j);
+ }
+ }
+ }
+ }
+ }
+ return graph;
+ }
+
+}
@@ -0,0 +1,81 @@
+package net.stuarthendren.jung.graph;
+
+import org.junit.Test;
+
+import edu.uci.ics.jung.graph.Graph;
+import edu.uci.ics.jung.graph.util.Pair;
+import static junit.framework.Assert.assertEquals;
+
+public class PlantedPartitionGraphTest {
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNegativeGroups() {
+ PlantedPartitionGraph.generateGraph(-2, 10, 0.7, 0.3);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNegativeVerticies() {
+ PlantedPartitionGraph.generateGraph(10, -10, 0.7, 0.3);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNegativeTnterGroupProbability() {
+ PlantedPartitionGraph.generateGraph(5, 10, 0.7, -0.3);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNegativeIntraGroupProbability() {
+ PlantedPartitionGraph.generateGraph(5, 10, 0.7, -0.6);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIntraNotGreaterThanInterProbability() {
+ PlantedPartitionGraph.generateGraph(5, 10, 0.7, 0.8);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testInterProbabilityLessThanOne() {
+ PlantedPartitionGraph.generateGraph(5, 10, 1.7, 0.8);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIntraProbabilityLessThanOne() {
+ PlantedPartitionGraph.generateGraph(5, 10, 0.7, 1.8);
+ }
+
+ @Test()
+ public void testSizeOfGraph() {
+ int numberOfClusters = 5;
+ int sizeOfClusters = 10;
+ Graph<Integer, Integer> graph = PlantedPartitionGraph.generateGraph(numberOfClusters, sizeOfClusters, 0.9, 0.1);
+ assertEquals(numberOfClusters * sizeOfClusters, graph.getVertexCount());
+ }
+
+ @Test()
+ public void testProbabilitites() {
+ int numberOfClusters = 50;
+ int sizeOfClusters = 10;
+ int sizeOfGraph = numberOfClusters * sizeOfClusters;
+ double interClusterProbability = 0.9;
+ double intraClusterProbability = 0.1;
+ Graph<Integer, Integer> graph = PlantedPartitionGraph.generateGraph(numberOfClusters, sizeOfClusters,
+ interClusterProbability, intraClusterProbability, 0);
+ int numberOfInterEdges = 0;
+ int numberOfIntraEdges = 0;
+ for (Integer edge : graph.getEdges()) {
+ Pair<Integer> endpoints = graph.getEndpoints(edge);
+ if (endpoints.getFirst() % numberOfClusters == endpoints.getSecond() % numberOfClusters) {
+ numberOfInterEdges++;
+ } else {
+ numberOfIntraEdges++;
+ }
+ }
+
+ double maxNumberOfInterEdges = numberOfClusters * ((sizeOfClusters * (sizeOfClusters - 1)) / 2);
+ double maxNumberOfIntraEdges = ((sizeOfGraph * (sizeOfGraph - 1)) / 2) - maxNumberOfInterEdges;
+
+ assertEquals(interClusterProbability, numberOfInterEdges / maxNumberOfInterEdges, 0.004);
+ assertEquals(intraClusterProbability, numberOfIntraEdges / maxNumberOfIntraEdges, 0.001);
+
+ }
+}

0 comments on commit 33c6669

Please sign in to comment.