This repository has been archived by the owner on May 10, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding PlantedPartitionGraph generator for cluster algorithm testing
- Loading branch information
1 parent
af23f50
commit 33c6669
Showing
2 changed files
with
191 additions
and
0 deletions.
There are no files selected for viewing
110 changes: 110 additions & 0 deletions
110
jung-extensions/src/main/java/net/stuarthendren/jung/graph/PlantedPartitionGraph.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
package net.stuarthendren.jung.graph; | ||
|
||
import java.util.Random; | ||
|
||
import edu.uci.ics.jung.graph.UndirectedGraph; | ||
import edu.uci.ics.jung.graph.UndirectedSparseGraph; | ||
|
||
/** | ||
* <p> | ||
* A planted partition graph is made up of <code>l</code> group each of size <code>k</code>. The probability of two | ||
* vertices in the same group having an edge between them is <code>p</code> and otherwise the probability is | ||
* <code>q</code> where <code>0 <= q < p <= 1</code> | ||
* </p> | ||
* <p> | ||
* It can be used to test clustering algorithms, the greater the difference between <code>p</code> and <code>q</code> | ||
* the easier it should be to detect the planted clusters. | ||
* </p> | ||
* | ||
* @author Stuart Hendren | ||
* | ||
*/ | ||
@SuppressWarnings("serial") | ||
public class PlantedPartitionGraph extends UndirectedSparseGraph<Integer, Integer> { | ||
|
||
/** | ||
* Create a planted partition graph. | ||
* | ||
* | ||
* @param numberOfClusters | ||
* the number of groups (or clusters) | ||
* @param sizeOfClusters | ||
* the number of vertices in a group | ||
* @param interClusterProbability | ||
* the probability of two vertices in the same group being connected | ||
* @param intraClusterProbability | ||
* the probability of two vertices not in the same group being connected must be less than interGroup | ||
* probability | ||
* | ||
*/ | ||
public static UndirectedGraph<Integer, Integer> generateGraph(int numberOfClusters, int sizeOfClusters, | ||
double interClusterProbability, double intraClusterProbability) { | ||
return generateGraph(numberOfClusters, sizeOfClusters, interClusterProbability, intraClusterProbability, | ||
System.currentTimeMillis()); | ||
} | ||
|
||
/** | ||
* Create a planted partition graph. | ||
* | ||
* | ||
* @param numberOfClusters | ||
* the number of groups (or clusters) | ||
* @param sizeOfClusters | ||
* the number of vertices in a group | ||
* @param interClusterProbability | ||
* the probability of two vertices in the same group being connected | ||
* @param intraClusterProbability | ||
* the probability of two vertices not in the same group being connected must be less than interGroup | ||
* probability | ||
* @param seed | ||
* seed for {@link Random} number generation | ||
* | ||
*/ | ||
public static UndirectedGraph<Integer, Integer> generateGraph(int numberOfClusters, int sizeOfClusters, | ||
double interClusterProbability, double intraClusterProbability, long seed) { | ||
if (numberOfClusters < 0) { | ||
throw new IllegalArgumentException("Number of groups must be positive"); | ||
} | ||
if (sizeOfClusters < 0) { | ||
throw new IllegalArgumentException("Group size must be positive"); | ||
} | ||
if (interClusterProbability < 0.0 || interClusterProbability > 1.0 || intraClusterProbability < 0.0 | ||
|| intraClusterProbability > 1.0) { | ||
throw new IllegalArgumentException("Probabilities must be between 0 and 1"); | ||
} | ||
if (interClusterProbability < intraClusterProbability) { | ||
throw new IllegalArgumentException("inter group probability must be less than the intra group probability"); | ||
} | ||
UndirectedGraph<Integer, Integer> graph = new UndirectedSparseGraph<Integer, Integer>(); | ||
return populateGraph(graph, numberOfClusters, sizeOfClusters, interClusterProbability, intraClusterProbability, | ||
seed); | ||
} | ||
|
||
private static UndirectedGraph<Integer, Integer> populateGraph(UndirectedGraph<Integer, Integer> graph, | ||
int numberOfClusters, int sizeOfClusters, double interClusterProbability, double intraClusterProbability, | ||
long seed) { | ||
int graphSize = numberOfClusters * sizeOfClusters; | ||
for (int i = 0; i < graphSize; i++) { | ||
graph.addVertex(i); | ||
} | ||
Random rand = new Random(seed); | ||
int edges = 0; | ||
for (int i = 0; i < graphSize; i++) { | ||
for (int j = 0; j < graphSize; j++) { | ||
if (i > j) { | ||
if (i % numberOfClusters == j % numberOfClusters) { | ||
if (rand.nextDouble() <= interClusterProbability) { | ||
graph.addEdge(edges++, i, j); | ||
} | ||
} else { | ||
if (rand.nextDouble() <= intraClusterProbability) { | ||
graph.addEdge(edges++, i, j); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
return graph; | ||
} | ||
|
||
} |
81 changes: 81 additions & 0 deletions
81
jung-extensions/src/test/java/net/stuarthendren/jung/graph/PlantedPartitionGraphTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package net.stuarthendren.jung.graph; | ||
|
||
import org.junit.Test; | ||
|
||
import edu.uci.ics.jung.graph.Graph; | ||
import edu.uci.ics.jung.graph.util.Pair; | ||
import static junit.framework.Assert.assertEquals; | ||
|
||
public class PlantedPartitionGraphTest { | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testNegativeGroups() { | ||
PlantedPartitionGraph.generateGraph(-2, 10, 0.7, 0.3); | ||
} | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testNegativeVerticies() { | ||
PlantedPartitionGraph.generateGraph(10, -10, 0.7, 0.3); | ||
} | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testNegativeTnterGroupProbability() { | ||
PlantedPartitionGraph.generateGraph(5, 10, 0.7, -0.3); | ||
} | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testNegativeIntraGroupProbability() { | ||
PlantedPartitionGraph.generateGraph(5, 10, 0.7, -0.6); | ||
} | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testIntraNotGreaterThanInterProbability() { | ||
PlantedPartitionGraph.generateGraph(5, 10, 0.7, 0.8); | ||
} | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testInterProbabilityLessThanOne() { | ||
PlantedPartitionGraph.generateGraph(5, 10, 1.7, 0.8); | ||
} | ||
|
||
@Test(expected = IllegalArgumentException.class) | ||
public void testIntraProbabilityLessThanOne() { | ||
PlantedPartitionGraph.generateGraph(5, 10, 0.7, 1.8); | ||
} | ||
|
||
@Test() | ||
public void testSizeOfGraph() { | ||
int numberOfClusters = 5; | ||
int sizeOfClusters = 10; | ||
Graph<Integer, Integer> graph = PlantedPartitionGraph.generateGraph(numberOfClusters, sizeOfClusters, 0.9, 0.1); | ||
assertEquals(numberOfClusters * sizeOfClusters, graph.getVertexCount()); | ||
} | ||
|
||
@Test() | ||
public void testProbabilitites() { | ||
int numberOfClusters = 50; | ||
int sizeOfClusters = 10; | ||
int sizeOfGraph = numberOfClusters * sizeOfClusters; | ||
double interClusterProbability = 0.9; | ||
double intraClusterProbability = 0.1; | ||
Graph<Integer, Integer> graph = PlantedPartitionGraph.generateGraph(numberOfClusters, sizeOfClusters, | ||
interClusterProbability, intraClusterProbability, 0); | ||
int numberOfInterEdges = 0; | ||
int numberOfIntraEdges = 0; | ||
for (Integer edge : graph.getEdges()) { | ||
Pair<Integer> endpoints = graph.getEndpoints(edge); | ||
if (endpoints.getFirst() % numberOfClusters == endpoints.getSecond() % numberOfClusters) { | ||
numberOfInterEdges++; | ||
} else { | ||
numberOfIntraEdges++; | ||
} | ||
} | ||
|
||
double maxNumberOfInterEdges = numberOfClusters * ((sizeOfClusters * (sizeOfClusters - 1)) / 2); | ||
double maxNumberOfIntraEdges = ((sizeOfGraph * (sizeOfGraph - 1)) / 2) - maxNumberOfInterEdges; | ||
|
||
assertEquals(interClusterProbability, numberOfInterEdges / maxNumberOfInterEdges, 0.004); | ||
assertEquals(intraClusterProbability, numberOfIntraEdges / maxNumberOfIntraEdges, 0.001); | ||
|
||
} | ||
} |