Navigation Menu

Skip to content

Commit

Permalink
fix tabs and spaces.
Browse files Browse the repository at this point in the history
  • Loading branch information
wonlay committed Aug 14, 2012
1 parent 34ae4db commit 9afa4df
Showing 1 changed file with 128 additions and 128 deletions.
256 changes: 128 additions & 128 deletions zoie-core/src/main/java/proj/zoie/api/impl/DocIDMapperImpl.java
Expand Up @@ -28,163 +28,163 @@
public class DocIDMapperImpl implements DocIDMapper<DocIDArray>
{
private final int[] _docArray; // the doc id of uid in _uidArray with the same index
private final long[] _uidArray; // partitioned uid array
private final int[] _start; // partition boundaries in _uidArray
private final long[] _filter; // a helper filter to early detect false lookup
private final int _mask; // the mask also the partition count - 1
private static final int MIXER = 2147482951; // a prime number
public DocIDMapperImpl(long[] uidArray)
{
int len = uidArray.length;
int mask = len/4; // 2 uids on average per partition,
private final long[] _uidArray; // partitioned uid array
private final int[] _start; // partition boundaries in _uidArray
private final long[] _filter; // a helper filter to early detect false lookup
private final int _mask; // the mask also the partition count - 1
private static final int MIXER = 2147482951; // a prime number
public DocIDMapperImpl(long[] uidArray)
{
int len = uidArray.length;
int mask = len/4; // 2 uids on average per partition,
// but we divide additional 2 for now

// let's replace all 0's after the first 1 in the mask:
mask |= (mask >> 1);
mask |= (mask >> 2);
mask |= (mask >> 4);
mask |= (mask >> 8);
mask |= (mask >> 16);
_mask = mask; // all 0's replaced with 1's and we get back most of the additional divide of 2,
mask |= (mask >> 1);
mask |= (mask >> 2);
mask |= (mask >> 4);
mask |= (mask >> 8);
mask |= (mask >> 16);
_mask = mask; // all 0's replaced with 1's and we get back most of the additional divide of 2,
// the average per partition is a little bit more than 2 now.

_filter = new long[mask+1]; // one filter bits per partition.
_filter = new long[mask+1]; // one filter bits per partition.
// this filter is optional, just to speed up the false lookup.

// we will set 2 bits in this 64 bits filter per uid. since on average there are a little bit
// more than 2 uids in each partition, so, most of the false lookup will miss at least one
// bit. from one miss, we can tell the uid is definitely not inside the _uidArray.
for(long uid : uidArray)
{
if(uid != ZoieIndexReader.DELETED_UID)
{
for(long uid : uidArray)
{
if(uid != ZoieIndexReader.DELETED_UID)
{
// the hash function is (int)((uid >>> 32) ^ uid) * MIXER,
// and we mod number of partions by "& _mask" (because & is much faster than mod).
int h = (int)((uid >>> 32) ^ uid)* MIXER;
long bits = _filter[h & _mask];
bits |= ((1L << (h >>> 26)));
bits |= ((1L << ((h >> 20) & 0x3F)));
_filter[h & _mask] = bits;
}
}
_start = new int[_mask + 1 + 1]; // we allocate 1 additinal more space for the positions
int h = (int)((uid >>> 32) ^ uid)* MIXER;
long bits = _filter[h & _mask];
bits |= ((1L << (h >>> 26)));
bits |= ((1L << ((h >> 20) & 0x3F)));
_filter[h & _mask] = bits;
}
}
_start = new int[_mask + 1 + 1]; // we allocate 1 additinal more space for the positions

// we fist assign the _start array with how many uid's fall into each partition:
len = 0;
for(long uid : uidArray)
{
if(uid != ZoieIndexReader.DELETED_UID)
{
_start[((int)((uid >>> 32) ^ uid) * MIXER) & _mask]++;
len++;
}
}
len = 0;
for(long uid : uidArray)
{
if(uid != ZoieIndexReader.DELETED_UID)
{
_start[((int)((uid >>> 32) ^ uid) * MIXER) & _mask]++;
len++;
}
}

// then, we sum them up and get all the boundaries:
int val = 0;
for(int i = 0; i < _start.length; i++)
{
val += _start[i];
_start[i] = val;
}
_start[_mask] = len;
int val = 0;
for(int i = 0; i < _start.length; i++)
{
val += _start[i];
_start[i] = val;
}
_start[_mask] = len;

// now start build the partitioned uid array and docArray:
long[] partitionedUidArray = new long[len];
int[] docArray = new int[len];
long[] partitionedUidArray = new long[len];
int[] docArray = new int[len];

// per each uid, we will reduce the value in _start, and the new value as the index in the
// new partitioned uid array. after all uids processed, _start[0] will be 0, and _start[1]
// will be the previous _start[0], _start[2] will be the previous _start[1] and so on. so
// it's like the _start array is shift one right, that's why we need an additional space
// for the _start array:
for(long uid : uidArray)
{
if(uid != ZoieIndexReader.DELETED_UID)
{
int i = --(_start[((int)((uid >>> 32) ^ uid) * MIXER) & _mask]);
partitionedUidArray[i] = uid;
}
}
for(long uid : uidArray)
{
if(uid != ZoieIndexReader.DELETED_UID)
{
int i = --(_start[((int)((uid >>> 32) ^ uid) * MIXER) & _mask]);
partitionedUidArray[i] = uid;
}
}

// sort all partitions:
int s = _start[0];
for(int i = 1; i < _start.length; i++)
{
int e = _start[i];
if(s < e)
{
Arrays.sort(partitionedUidArray, s, e);
}
s = e;
}
int s = _start[0];
for(int i = 1; i < _start.length; i++)
{
int e = _start[i];
if(s < e)
{
Arrays.sort(partitionedUidArray, s, e);
}
s = e;
}

// assign the co-responding doc ids to the same index as the uid in the uid array
// (note that, at first the doc id of the first uid is 0, for the second uid is 1, and so on):
for(int docid = 0; docid < uidArray.length; docid++)
{
long uid = uidArray[docid];
if(uid != ZoieIndexReader.DELETED_UID)
{
final int p = ((int)((uid >>> 32) ^ uid) * MIXER) & _mask;
int idx = findIndex(partitionedUidArray, uid, _start[p], _start[p + 1]);
if(idx >= 0)
{
docArray[idx] = docid;
}
}
}
_uidArray = partitionedUidArray;
_docArray = docArray;
}
public int getDocID(final long uid)
{
final int h = (int)((uid >>> 32) ^ uid) * MIXER;
final int p = h & _mask;

// check the filter
final long bits = _filter[p];
if((bits & (1L << (h >>> 26))) == 0 || (bits & (1L << ((h >> 20) & 0x3F))) == 0) return -1;

// do binary search in the partition
int begin = _start[p];
int end = _start[p + 1] - 1;
// we have some uids in this partition, so we assume (begin <= end)
while(true)
{
int mid = (begin+end) >>> 1;
long midval = _uidArray[mid];
if(midval == uid) return _docArray[mid];
if(mid == end) return -1;
if(midval < uid) begin = mid + 1;
else end = mid;
}
}
private static final int findIndex(final long[] arr, final long uid, int begin, int end)
{
if(begin >= end) return -1;
end--;

while(true)
{
int mid = (begin+end) >>> 1;
long midval = arr[mid];
if(midval == uid) return mid;
if(mid == end) return -1;
if(midval < uid) begin = mid + 1;
else end = mid;
}
}
for(int docid = 0; docid < uidArray.length; docid++)
{
long uid = uidArray[docid];
if(uid != ZoieIndexReader.DELETED_UID)
{
final int p = ((int)((uid >>> 32) ^ uid) * MIXER) & _mask;
int idx = findIndex(partitionedUidArray, uid, _start[p], _start[p + 1]);
if(idx >= 0)
{
docArray[idx] = docid;
}
}
}
_uidArray = partitionedUidArray;
_docArray = docArray;
}
public int getDocID(final long uid)
{
final int h = (int)((uid >>> 32) ^ uid) * MIXER;
final int p = h & _mask;

// check the filter
final long bits = _filter[p];
if((bits & (1L << (h >>> 26))) == 0 || (bits & (1L << ((h >> 20) & 0x3F))) == 0) return -1;

// do binary search in the partition
int begin = _start[p];
int end = _start[p + 1] - 1;
// we have some uids in this partition, so we assume (begin <= end)
while(true)
{
int mid = (begin+end) >>> 1;
long midval = _uidArray[mid];
if(midval == uid) return _docArray[mid];
if(mid == end) return -1;
if(midval < uid) begin = mid + 1;
else end = mid;
}
}
private static final int findIndex(final long[] arr, final long uid, int begin, int end)
{
if(begin >= end) return -1;
end--;

while(true)
{
int mid = (begin+end) >>> 1;
long midval = arr[mid];
if(midval == uid) return mid;
if(mid == end) return -1;
if(midval < uid) begin = mid + 1;
else end = mid;
}
}

public DocIDArray getDocIDArray(long[] uids)
{
Expand Down

0 comments on commit 9afa4df

Please sign in to comment.