Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consider encoding when deduplicating strings. #5198

Merged
merged 1 commit into from
Sep 19, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 55 additions & 9 deletions core/src/main/java/org/jruby/Ruby.java
Original file line number Diff line number Diff line change
Expand Up @@ -4716,6 +4716,20 @@ public RubyString getThreadStatus(RubyThread.Status status) {
* could cause contention under heavy concurrent load, so a reexamination
* of this design might be warranted.
*
* Because RubyString.equals does not consider encoding, and MRI's logic for deduplication does need to consider
* encoding, we use a wrapper object as the key. These wrappers need to be used on all get operations, so if we
* don't need to insert anything we reuse that wrapper the next time.
*
* The logic here reads like this:
*
* 1. If the string is not a natural String object, just freeze and return it.
* 2. Use the wrapper from the thread-local cache or create and set a new one.
* 3. Use the wrapper to look up the deduplicated string.
* 4. If there's a dedup in the cache, clear the wrapper for next time and return the dedup.
* 5. Remove the wrapper from the threadlocal to avoid reusing it, since we'll insert it.
* 6. Atomically set the new entry or repair the GCed entry that already exists.
* 7. Return the newly-deduplicated string.
*
* @param string the string to freeze-dup if an equivalent does not already exist
* @return the freeze-duped version of the string
*/
Expand All @@ -4727,18 +4741,31 @@ public RubyString freezeAndDedupString(RubyString string) {
return duped;
}

WeakReference<RubyString> dedupedRef = dedupMap.get(string);
// Get cached wrapper or create new
FStringEqual wrapper = DEDUP_WRAPPER_CACHE.get();
if (wrapper == null) {
wrapper = new FStringEqual(string);
DEDUP_WRAPPER_CACHE.set(wrapper);
} else {
wrapper.string = string;
}

WeakReference<RubyString> dedupedRef = dedupMap.get(wrapper);
RubyString deduped;

if (dedupedRef == null || (deduped = dedupedRef.get()) == null) {
// We will insert wrapper one way or another so clear from threadlocal
DEDUP_WRAPPER_CACHE.remove();

// Never use incoming value as key
deduped = string.strDup(this);
deduped.setFrozen(true);

final WeakReference<RubyString> weakref = new WeakReference<>(deduped);

// try to insert new
dedupedRef = dedupMap.computeIfAbsent(deduped, key -> weakref);
wrapper.string = deduped;
dedupedRef = dedupMap.computeIfAbsent(wrapper, key -> weakref);
if (dedupedRef == null) return deduped;

// entry exists, return result if not vacated
Expand All @@ -4747,22 +4774,41 @@ public RubyString freezeAndDedupString(RubyString string) {

// ref is there but vacated, try to replace it until we have a result
while (true) {
dedupedRef = dedupMap.computeIfPresent(string, (key, old) -> old.get() == null ? weakref : old);
wrapper.string = string;
dedupedRef = dedupMap.computeIfPresent(wrapper, (key, old) -> old.get() == null ? weakref : old);

// return result if not vacated
unduped = dedupedRef.get();
if (unduped != null) return unduped;
}
} else if (deduped.getEncoding() != string.getEncoding()) {
// if encodings don't match, new string loses; can't dedup
// FIXME: This may never happen, if we are properly considering encoding in RubyString.hashCode
deduped = string.strDup(this);
deduped.setFrozen(true);
} else {
// Do not retain string if we can reuse the wrapper
wrapper.string = null;
}

return deduped;
}

static class FStringEqual {
RubyString string;
FStringEqual(RubyString string) {
this.string = string;
}
public boolean equals(Object other) {
if (other instanceof FStringEqual) {
RubyString otherString = ((FStringEqual) other).string;
return this.string.equals(otherString) && this.string.getEncoding() == otherString.getEncoding();
}
return false;
}

public int hashCode() {
return string.hashCode();
}
}

private final ThreadLocal<FStringEqual> DEDUP_WRAPPER_CACHE = new ThreadLocal<>();

public int getRuntimeNumber() {
return runtimeNumber;
}
Expand Down Expand Up @@ -5264,7 +5310,7 @@ public void addToObjectSpace(boolean useObjectSpace, IRubyObject object) {
*
* Access must be synchronized.
*/
private ConcurrentHashMap<RubyString, WeakReference<RubyString>> dedupMap = new ConcurrentHashMap<>();
private ConcurrentHashMap<FStringEqual, WeakReference<RubyString>> dedupMap = new ConcurrentHashMap<>();

private static final AtomicInteger RUNTIME_NUMBER = new AtomicInteger(0);
private final int runtimeNumber = RUNTIME_NUMBER.getAndIncrement();
Expand Down