Skip to content

Commit

Permalink
Update IntList slab creation to keep bumping up size gradually
Browse files Browse the repository at this point in the history
  • Loading branch information
Piyush Narang committed Apr 18, 2016
1 parent ebf1c58 commit 9617015
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 8 deletions.
Expand Up @@ -31,7 +31,13 @@
*/
public class IntList {

private static final int SLAB_SIZE = 64 * 1024;
private static final int MAX_SLAB_SIZE = 64 * 1024;
private static final int INITIAL_SLAB_SIZE = 4 * 1024;

//Double slab size till we reach the max slab size. At that point we just add slabs of size
//MAX_SLAB_SIZE. This ensures we don't allocate very large slabs from the start if we don't have
//too much data.
private int currentSlabSize = INITIAL_SLAB_SIZE / 2;

/**
* to iterate on the content of the list
Expand All @@ -43,14 +49,17 @@ public class IntList {
public static class IntIterator {

private final int[][] slabs;
private int current;
private final int count;

private int current;
private int currentRow;
private int currentCol;

/**
* slabs will be iterated in order up to the provided count
* as the last slab may not be full
* @param slabs contain the ints
* @param count total count of ints
* @param count count of ints
*/
public IntIterator(int[][] slabs, int count) {
this.slabs = slabs;
Expand All @@ -68,11 +77,19 @@ public boolean hasNext() {
* @return the next int
*/
public int next() {
final int result = slabs[current / SLAB_SIZE][current % SLAB_SIZE];
++ current;
final int result = slabs[currentRow][currentCol];
incrementPosition();
return result;
}

private void incrementPosition() {
current++;
currentCol++;
if(currentCol >= slabs[currentRow].length) {
currentCol = 0;
currentRow++;
}
}
}

private List<int[]> slabs = new ArrayList<int[]>();
Expand All @@ -83,10 +100,21 @@ public int next() {
private int currentSlabPos;

private void initSlab() {
currentSlab = new int[SLAB_SIZE];
updateCurrentSlabSize();
currentSlab = new int[currentSlabSize];
currentSlabPos = 0;
}

//Double slab size up to the MAX_SLAB_SIZE limit
private void updateCurrentSlabSize() {
if(currentSlabSize < MAX_SLAB_SIZE) {
currentSlabSize *= 2;
if(currentSlabSize > MAX_SLAB_SIZE) {
currentSlabSize = MAX_SLAB_SIZE;
}
}
}

/**
* @param i value to append to the end of the list
*/
Expand All @@ -113,14 +141,19 @@ public IntIterator iterator() {

int[][] itSlabs = slabs.toArray(new int[slabs.size() + 1][]);
itSlabs[slabs.size()] = currentSlab;
return new IntIterator(itSlabs, SLAB_SIZE * slabs.size() + currentSlabPos);
return new IntIterator(itSlabs, size());
}

/**
* @return the current size of the list
*/
public int size() {
return SLAB_SIZE * slabs.size() + currentSlabPos;
int size = currentSlabPos;
for(int [] slab : slabs) {
size += slab.length;
}

return size;
}

}
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.column.values.dictionary;

import org.junit.Test;

import junit.framework.Assert;

public class IntListTest {

/**
* Test IntList of fairly small size (< 4K), this tests a single slab being created
*/
@Test
public void testSmallList() {
int testSize = 100;
doTestIntList(testSize);
}

/**
* Test IntList > 4K so that we have multiple slabs being created
*/
@Test
public void testListGreaterThan4K() {
int testSize = 6000;
doTestIntList(testSize);
}

/**
* Test IntList of a fairly large size (> 300K) so that we have multiple slabs
* created of varying sizes
*/
@Test
public void testListGreaterThan300K() {
int testSize = 310000;
doTestIntList(testSize);
}

private void doTestIntList(int testSize) {
IntList testList = new IntList();
populateList(testList, testSize);

verifyIteratorResults(testSize, testList);
}

private void populateList(IntList testList, int size) {
for(int i = 0; i < size; i++) {
testList.add(i);
}
}

private void verifyIteratorResults(int testSize, IntList testList) {
IntList.IntIterator iterator = testList.iterator();
int expected = 0;

while (iterator.hasNext()) {
int val = iterator.next();
Assert.assertEquals(expected, val);
expected++;
}

//ensure we have the correct final value of expected
Assert.assertEquals(testSize, expected);
}
}

0 comments on commit 9617015

Please sign in to comment.