Skip to content

Commit

Permalink
Minor (~2%) increase in cache hit rate
Browse files Browse the repository at this point in the history
By including length in the hash function.

I tried various changes including 2048 cache size, or not replacing conflicts. Larger did not give a hit rate improvement commensurate to the extra size, and not replacing conflicts led to a significant drop to the hit rate.
  • Loading branch information
jhy committed Feb 17, 2020
1 parent 265653e commit 7a40591
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/main/java/org/jsoup/parser/CharacterReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ public final class CharacterReader {
private int bufPos;
private int readerPos;
private int bufMark = -1;
private String[] stringCache = new String[512]; // holds reused strings in this doc, to lessen garbage
private static final int stringCacheSize = 512;
private String[] stringCache = new String[stringCacheSize]; // holds reused strings in this doc, to lessen garbage

public CharacterReader(Reader input, int sz) {
Validate.notNull(input);
Expand Down Expand Up @@ -520,6 +521,8 @@ boolean containsIgnoreCase(String seq) {

@Override
public String toString() {
if (bufLength - bufPos < 0)
return "";
return new String(charBuf, bufPos, bufLength - bufPos);
}

Expand All @@ -538,14 +541,14 @@ private static String cacheString(final char[] charBuf, final String[] stringCac
return "";

// calculate hash:
int hash = 0;
int hash = 31 * count;
int offset = start;
for (int i = 0; i < count; i++) {
hash = 31 * hash + charBuf[offset++];
}

// get from cache
final int index = hash & stringCache.length - 1;
final int index = hash & stringCacheSize - 1;
String cached = stringCache[index];

if (cached == null) { // miss, add
Expand Down

0 comments on commit 7a40591

Please sign in to comment.