/
SearchResult.java
115 lines (98 loc) · 3.6 KB
/
SearchResult.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package dk.netarkivet.harvester.webinterface.servlet;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
public class SearchResult implements Pageable {
protected Heritrix3JobMonitor h3Job;
protected Pattern p;
protected Matcher m;
protected File srLogFile;
protected RandomAccessFile srLogRaf;
protected File srIdxFile;
protected RandomAccessFile srIdxRaf;
protected long lastIndexed;
public SearchResult(NASEnvironment environment, Heritrix3JobMonitor h3Job, String q, int searchResultNr) throws IOException {
this.h3Job = h3Job;
p = Pattern.compile(q, Pattern.CASE_INSENSITIVE);
m = p.matcher("42");
srLogFile = new File(environment.tempPath, "crawllog-" + h3Job.jobId + "-" + searchResultNr + ".log");
srLogRaf = new RandomAccessFile(srLogFile, "rw");
srLogRaf.setLength(0);
srIdxFile = new File(environment.tempPath, "crawllog-" + h3Job.jobId + "-" + searchResultNr + ".idx");
srIdxRaf = new RandomAccessFile(srIdxFile, "rw");
srIdxRaf.setLength(0);
srIdxRaf.writeLong(0);
lastIndexed = 0;
}
public synchronized void update() throws IOException {
RandomAccessFile logRaf = new RandomAccessFile(h3Job.logFile, "r");
logRaf.seek(lastIndexed);
srLogRaf.seek(srLogRaf.length());
srIdxRaf.seek(srIdxRaf.length());
FileChannel logChannel = logRaf.getChannel();
byte[] bytes = new byte[1024*1024];
ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
String tmpStr;
//long index = lastIndex;
long index = srLogRaf.length();
int pos;
int to;
int mark;
int limit;
boolean b;
while (logChannel.read(byteBuffer) != -1) {
byteBuffer.flip();
pos = byteBuffer.position();
mark = pos;
limit = byteBuffer.limit();
b = true;
while (b) {
if (pos < limit) {
if (bytes[pos++] == '\n') {
to = pos - 1;
if (bytes[to - 1] == '\r') {
--to;
}
tmpStr = new String(bytes, mark, to - mark, "UTF-8");
m.reset(tmpStr);
if (m.matches()) {
srLogRaf.write(bytes, mark, pos - mark);
index += pos - mark;
srIdxRaf.writeLong(index);
}
lastIndexed += pos - mark;
// next
mark = pos;
//index += pos - mark;
//lastIndex = index;
}
} else {
b = false;
}
}
byteBuffer.position(mark);
byteBuffer.compact();
}
logRaf.close();
}
@Override
public long getIndexSize() {
return srIdxFile.length();
}
@Override
public long getLastIndexed() {
return srLogFile.length();
}
@Override
public synchronized byte[] readPage(long page, long itemsPerPage, boolean descending) throws IOException {
return StringIndexFile.readPage(srIdxRaf, srLogRaf, page, itemsPerPage, descending);
}
public synchronized void cleanup() {
IOUtils.closeQuietly(srIdxRaf);
}
}