/
ConnectionsCollector.java
108 lines (91 loc) · 3.23 KB
/
ConnectionsCollector.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package cleo.search.bootstrap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import cleo.search.Element;
import cleo.search.store.ArrayStoreElement;
import cleo.search.store.ConnectionsStore;
/**
* ConnectionsCollector
*
* @author jwu
* @since 02/06, 2011
*/
public class ConnectionsCollector {
private HashMap<String, ArrayList<Element>> map;
public ConnectionsCollector(int initialCapacity) {
map = new HashMap<String, ArrayList<Element>>(initialCapacity);
}
public synchronized void clear() {
map.clear();
}
public synchronized <E extends Element> void collect(ArrayStoreElement<E> elementStore, int maxKeyLength) {
int index;
int indexStart = elementStore.getIndexStart();
for(int i = 0, cnt = elementStore.capacity(); i < cnt; i++) {
index = indexStart + i;
E element = elementStore.getElement(index);
if(element != null) {
for(String term : element.getTerms()) {
int len = Math.min(term.length(), maxKeyLength);
for(int k = 1; k <= len; k++) {
addConnection(term.substring(0, k), element);
}
}
}
}
}
public synchronized void store(ConnectionsStore<String> connectionsStore) throws Exception {
int keyCnt=0;
int maxCnt=0;
int minCnt=Integer.MAX_VALUE;
ElementScoreCmp cmpDesc = new ElementScoreCmp();
for(String key : map.keySet()) {
ArrayList<Element> list = map.get(key);
Collections.sort(list, cmpDesc);
int[] connections = new int[list.size()];
for(int i = 0, cnt = connections.length; i < cnt; i++) {
connections[i] = list.get(i).getElementId();
}
connectionsStore.putConnections(key, connections, System.currentTimeMillis());
maxCnt = Math.max(maxCnt, connections.length);
minCnt = Math.min(minCnt, connections.length);
keyCnt++;
}
connectionsStore.sync();
System.out.printf("#keys=%d connectionsMaxCnt=%d connectionsMinCnt=%d%n", keyCnt, maxCnt, minCnt);
}
private void addConnection(String source, Element connection) {
ArrayList<Element> list = map.get(source);
if(list == null) {
list = new ArrayList<Element>(500);
map.put(source, list);
}
list.add(connection);
}
static class ElementScoreCmp implements Comparator<Element> {
@Override
public int compare(Element e0, Element e1) {
float score0 = e0.getScore();
float score1 = e1.getScore();
// Descending order
return score0 < score1 ? 1 : (score0 == score1 ? (e0.getElementId() - e1.getElementId()) : -1);
}
}
}