Skip to content

Commit

Permalink
TAIL配列圧縮用クラス追加
Browse files Browse the repository at this point in the history
  • Loading branch information
takeru ohta committed Aug 27, 2010
1 parent 1e9a106 commit 84dbf7e
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 20 deletions.
10 changes: 6 additions & 4 deletions src/net/reduls/jada/NodeAllocator.java
Expand Up @@ -18,12 +18,14 @@ public NodeAllocator(int base[], int chck[], final int codeLimit) {
freeNext[i]=-(i+1);
}

freeNext[1] = -codeLimit;
freePrev[codeLimit] = -1;
freeNext[headIndex()] = -codeLimit;
freePrev[codeLimit] = -headIndex();
}


public static int headIndex() { return 1; }

public int allocate(final List<Integer> children) {
int cur = -freeNext[1];
int cur = -freeNext[headIndex()];
final int first = children.get(0);

for(;; cur = -freeNext[-freeNext[-freeNext[cur]]]) {
Expand Down
74 changes: 74 additions & 0 deletions src/net/reduls/jada/ShrinkTail.java
@@ -0,0 +1,74 @@
package net.reduls.jada;

import java.util.List;
import java.util.ArrayList;
import java.util.Collections;

final class ShrinkTail {
private final String tail;
private int base[];
private List<TTT> ss = new ArrayList<TTT>();

public ShrinkTail(int base[], final int chck[], final String tail, final int keyCount) {
this.tail = tail;
this.base = base;

for(int i=0; i < base.length; i++)
if(chck[i] >= 0 && base[i] < 0 && i != NodeAllocator.headIndex()) {
final int beg = -base[i];
final int end = tail.indexOf('\0',beg);
ss.add(new TTT(tail.substring(beg,end), i));
}
}

public String shrink() {
Collections.sort(ss);

StringBuilder newTail = new StringBuilder();
newTail.append("\0\0");
for(int i=0; i < ss.size(); i++) {
final TTT t = ss.get(i);
int pos = newTail.length();
if(i>0 && ss.get(i-1).including(t))
pos -= t.s.length()+1; // +1 is necessary for last '\0' character
else
newTail.append(t.s+'\0');
base[t.i] = -pos;
}
return newTail.toString();
}

private static class TTT implements Comparable<TTT> {
public String s;
public int i;

public TTT(String s, int i) {
this.s = s;
this.i = i;
}

public boolean including(TTT t) {
int i=s.length()-1;
int j=t.s.length()-1;

for(;; i--, j--) {
if(j < 0) return true;
if(i < 0) return false;
if(s.charAt(i) != t.s.charAt(j)) return false;
}
}

public int compareTo(TTT t) {
int i=s.length()-1;
int j=t.s.length()-1;

for(;; i--, j--) {
if(i < 0 && j < 0) return 0;
if(i < 0) return 1;
if(j < 0) return -1;
if(s.charAt(i) > t.s.charAt(j)) return -1;
if(s.charAt(i) < t.s.charAt(j)) return 1;
}
}
}
}
32 changes: 21 additions & 11 deletions src/net/reduls/jada/TrieBuilder.java
Expand Up @@ -4,14 +4,15 @@
import java.util.ArrayList;

public final class TrieBuilder {
private boolean has_built = false;
private boolean hasBuilt = false;

private CodeStream[] keys;
private NodeAllocator alloca;
private int[] base;
private int[] chck;

private StringBuilder tail = new StringBuilder();
private StringBuilder tailSB = new StringBuilder();
private String tail;

private int charcode[] = new int[0x10001];
private CharFreq charFreqs[] = new CharFreq[0x10001];
Expand All @@ -28,11 +29,15 @@ public TrieBuilder(final List<String> keys) {
chck = new int[nodeLimit];
alloca = new NodeAllocator(base, chck, codeLimit);

tail.append("\0\0");
tailSB.append("\0\0");
}

public Trie build() {
if(has_built==false) {
return build(false);
}

public Trie build(boolean shrinkTail) {
if(hasBuilt==false) {
buildImpl(0, keys.length, 0);

int nodeSize=0;
Expand All @@ -47,26 +52,31 @@ public Trie build() {
System.arraycopy(chck, 0, tmpChck, 0, nodeSize);
base = tmpBase;
chck = tmpChck;

has_built = true;

tail = tailSB.toString();
tailSB.setLength(0);
if(shrinkTail)
tail = new ShrinkTail(base, chck, tail, keys.length).shrink();

hasBuilt = true;
}

BitVector bv = new BitVector(base.length);
for(int i=0; i < base.length; i++)
if(chck[i] >= 0 && base[i] < 0)
if(chck[i] >= 0 && base[i] < 0 && i != NodeAllocator.headIndex())
bv.set(i, true);
bv.buildRankIndex();

return new Trie(base, chck, tail.toString(), charcode, bv);
return new Trie(base, chck, tail, charcode, bv);
}

private void buildImpl(int beg, final int end, final int rootNode) {
if(end-beg == 1) {
if(keys[beg].rest().isEmpty()==false) {
base[rootNode] = -tail.length();
tail.append(keys[beg].rest()+'\0');
base[rootNode] = -tailSB.length();
tailSB.append(keys[beg].rest()+'\0');
} else {
base[rootNode] = -(tail.length()-1);
base[rootNode] = -(tailSB.length()-1);
}
return;
}
Expand Down
15 changes: 10 additions & 5 deletions src/net/reduls/jada/bin/Build.java
Expand Up @@ -10,10 +10,15 @@

public final class Build {
public static void main(String[] args) throws IOException {
if(args.length != 1) {
System.err.println("Usage: java net.reduls.jada.bin.Build index < unique-sorted-key-set");
if(!(args.length == 1 || (args.length==2 && args[0].equals("--shrink")))) {
System.err.println("Usage: java net.reduls.jada.bin.Build [--shrink] index < unique-sorted-key-set");
System.exit(1);
}

// parse arguments
final String indexFilePath = args.length==1 ? args[0] : args[1];
final boolean shrinkTail = args.length==1 ? false : true;

Time t;

System.err.println("= Read key set");
Expand All @@ -31,11 +36,11 @@ public static void main(String[] args) throws IOException {
System.err.println(" == initialize");
TrieBuilder bld = new TrieBuilder(keys);
System.err.println(" == build");
Trie trie = bld.build();
Trie trie = bld.build(shrinkTail);
System.err.println(" === node count: "+trie.nodeCount());
System.err.println(" === tail length: "+trie.tailLength());
System.err.println(" == save");
trie.save(args[0]);
System.err.println(" == save: "+indexFilePath);
trie.save(indexFilePath);
System.err.println("DONE ("+t.elapsed()+" ms)");
}

Expand Down

0 comments on commit 84dbf7e

Please sign in to comment.