/
MatchAlgorithm.java
118 lines (101 loc) · 3.9 KB
/
MatchAlgorithm.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.checkerframework.checker.nullness.qual.NonNull;
class MatchAlgorithm {
private static final int MOD = 37;
private int lastMod = 1;
private final Tokens tokens;
private final List<TokenEntry> code;
private final int minTileSize;
MatchAlgorithm(Tokens tokens, int minTileSize) {
this.tokens = tokens;
this.code = tokens.getTokens();
this.minTileSize = minTileSize;
for (int i = 0; i < minTileSize; i++) {
lastMod *= MOD;
}
}
public TokenEntry tokenAt(int offset, TokenEntry m) {
return code.get(offset + m.getIndex());
}
public int getMinimumTileSize() {
return this.minTileSize;
}
public List<Match> findMatches(@NonNull CPDListener cpdListener, SourceManager sourceManager) {
MatchCollector matchCollector = new MatchCollector(this);
{
cpdListener.phaseUpdate(CPDListener.HASH);
Map<TokenEntry, Object> markGroups = hash();
cpdListener.phaseUpdate(CPDListener.MATCH);
markGroups.values()
.stream()
.filter(it -> it instanceof List)
.forEach(it -> {
@SuppressWarnings("unchecked")
List<TokenEntry> l = (List<TokenEntry>) it;
Collections.reverse(l);
matchCollector.collect(l);
});
// put markGroups out of scope
}
cpdListener.phaseUpdate(CPDListener.GROUPING);
List<Match> matches = matchCollector.getMatches();
matches.sort(Comparator.naturalOrder());
for (Match match : matches) {
for (Mark mark : match) {
TokenEntry token = mark.getToken();
TokenEntry endToken = tokens.getEndToken(token, match);
mark.setEndToken(endToken);
}
}
cpdListener.phaseUpdate(CPDListener.DONE);
return matches;
}
@SuppressWarnings("PMD.JumbledIncrementer")
private Map<TokenEntry, Object> hash() {
int lastHash = 0;
Map<TokenEntry, Object> markGroups = new HashMap<>(tokens.size());
for (int i = code.size() - 1; i >= 0; i--) {
TokenEntry token = code.get(i);
if (!token.isEof()) {
int last = tokenAt(minTileSize, token).getIdentifier();
lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
token.setHashCode(lastHash);
Object o = markGroups.get(token);
// Note that this insertion method is worthwhile since the vast
// majority
// markGroup keys will have only one value.
if (o == null) {
markGroups.put(token, token);
} else if (o instanceof TokenEntry) {
List<TokenEntry> l = new ArrayList<>();
l.add((TokenEntry) o);
l.add(token);
markGroups.put(token, l);
} else {
@SuppressWarnings("unchecked")
List<TokenEntry> l = (List<TokenEntry>) o;
l.add(token);
}
} else {
lastHash = 0;
for (int end = Math.max(0, i - minTileSize + 1); i > end; i--) {
token = code.get(i - 1);
lastHash = MOD * lastHash + token.getIdentifier();
if (token.isEof()) {
break;
}
}
}
}
return markGroups;
}
}