Skip to content

Commit

Permalink
Cleaned up the CdfPartitionFunction to remove the upper bounds array.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Pigott committed Jun 23, 2014
1 parent 4bc270e commit f2831db
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 22 deletions.
32 changes: 12 additions & 20 deletions algorithms/src/mpigott/sort/CdfPartitionFunction.java
Expand Up @@ -95,7 +95,7 @@ public CdfPartitionFunction(List<T> input, int cellSize, double alpha, double cd

// First we'll determine the range of values.
min = input.get(0);
max = input.get(0);
T max = input.get(0);

for (int index = 1; index < input.size(); ++index) {
T value = input.get(index);
Expand All @@ -110,11 +110,6 @@ public CdfPartitionFunction(List<T> input, int cellSize, double alpha, double cd

perCellRange = (max.distance(min) + 1.0) / numCells;

perCellUpperBounds = new double[(int) numCells];
for (int i = 0; i < numCells; ++i) {
perCellUpperBounds[i] = (i + 1) * perCellRange;
}

int[] sampleCountsPerCell = new int[(int) numCells];

/* The next step is to randomly sample elements from the list to
Expand All @@ -128,7 +123,6 @@ public CdfPartitionFunction(List<T> input, int cellSize, double alpha, double cd
for (int index = 0; index < input.size(); index += randomSampleIndex) {
int cell = (int) (input.get(index).distance(min) / perCellRange);
if (cell >= sampleCountsPerCell.length) {
System.out.println("input.get(" + index + "){" + input.get(index) + "}.distance({" + min + "}) {" + input.get(index).distance(min) + "} / perCellRange {" + perCellRange + "} = " + cell + "; sampleCountsPerCell.length = " + sampleCountsPerCell.length);
cell = sampleCountsPerCell.length - 1;
}
++sampleCountsPerCell[cell];
Expand All @@ -145,21 +139,21 @@ public CdfPartitionFunction(List<T> input, int cellSize, double alpha, double cd
cdf[cellIndex] = (sampleCountsPerCell[cellIndex] + 1.0) / sc + prevSi;
prevSi = cdf[cellIndex];
}

}

/**
* Returns the class of the input <code>value</code>
* Returns the class of the input <code>element</code>
* based on the CDF function of the sample sizes.
*
* @see PartitionFunction#getClass(Element)
*/
public int getClass(T value) {
int cellNum = (int) (value.distance(min) / perCellRange);
public int getClass(T element) {
double value = element.distance(min);

int cellNum = (int) (value / perCellRange);
if (cellNum >= cdf.length) {
cellNum = cdf.length - 1;
}
final double prevCdf = (cellNum == 0.0) ? 0.0 : cdf[cellNum - 1];

/* "The second step finds px, the cumulative probability or CDF of x.
* It equals to the cumulative probability of its preceding cell
Expand All @@ -172,14 +166,14 @@ public int getClass(T value) {
* previous cell, m is the slope calculated above, and x is the
* current value.
*/
final double slope = (cdf[cellNum] - prevCdf) / perCellRange;

final double x = value.distance(min) -
((cellNum == 0) ? 0.0 : perCellUpperBounds[cellNum - 1]);
double prevRange = cellNum * perCellRange;
double prevCdf = (cellNum == 0) ? 0.0 : cdf[cellNum - 1];

final double currCdf = slope * x + prevCdf;
final double slope = (cdf[cellNum] - prevCdf) / perCellRange;
final double x = value - prevRange;
final double px = slope * x + prevCdf;

return (int) (currCdf * numCells);
return (int) (px * numCells);
}

/**
Expand All @@ -192,9 +186,7 @@ public int getNumClasses() {
}

private T min;
private T max;
private double[] cdf;
private final double numCells;
private final double perCellRange;
private double[] perCellUpperBounds;
}
4 changes: 2 additions & 2 deletions algorithms/test/mpigott/sort/CdfDataPartitionSortTest.java
Expand Up @@ -142,7 +142,7 @@ public void testRandomPartitioning3() {

@Test
public void testVariousSorters() {
ArrayList<NumericElement<Double>> cdfPartitionInput = createRandomInput(85000, -250000.0, 1250000.0);
ArrayList<NumericElement<Double>> cdfPartitionInput = createRandomInput(1000000, -250000.0, 1250000.0);
ArrayList<NumericElement<Double>> flashSortInput = (ArrayList<NumericElement<Double>>) cdfPartitionInput.clone();

CdfPartitionFunction<NumericElement<Double>, Double> cdfPartitionFunc =
Expand All @@ -158,7 +158,7 @@ public void testVariousSorters() {
}

FlashSortPartitionFunction<NumericElement<Double>, Double> fsPartitionFunc =
new FlashSortPartitionFunction<NumericElement<Double>, Double>(flashSortInput, 85);
new FlashSortPartitionFunction<NumericElement<Double>, Double>(flashSortInput, 1000);

int[] fsClassBounds = CyclePartitioner.partition(flashSortInput, fsPartitionFunc);

Expand Down

0 comments on commit f2831db

Please sign in to comment.