Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Implement jar directory snapshot caching. #1546

Merged
merged 2 commits into from

2 participants

@ratnikov

Currently, when we try to load a jar resource, we go through all the jar entries to figure out if there are any that match the directory information. When doing globbing or Dir.entries it also currently scans the jar again.

This PR optimizes this by creating a static JarCache reference that caches information about loaded jars, including representation of the directory structure. The inform1.7.10: user 1m24.332s
HEAD: user 0m19.604sation is stored in a WeakHashMap which should allow it to be garbage collected. Benchmarking on a jar containing files from jruby.git/core (just over 9000 files) shows about 4x improvement:

time bin/jruby -e '5.times { Dir.glob("file:/home/ratnikov/jruby.git/stuff.jar!core/**/*") }'

Results for 1.7.10: 1m24.332s
Results for HEAD: 0m19.604s

I have considered doing a per Ruby runtime cache, but that would require resources to have a reference to a Runtime and caching cross-runtime jar contents is also probably okay.

@ratnikov

I've updated the PR to make sure that caching respects modification time and added a test to verify it.

This didn't really affect runtime for my stuff.jar test:

$ time bin/jruby -e '5.times { Dir.glob("file:/home/ratnikov/jruby.git/stuff.jar!/core/**/*") }'

real    0m11.145s
user    0m21.404s
sys     0m1.048s

Please review/merge. :)

@ratnikov

Seems like the break is due to jruby-1_7 being broken.

ratnikov added some commits
@ratnikov ratnikov Implement jar directory snapshot caching.
This improves jar globbing performance by about 4x on a simple benchmark.

For a jar containing 9156 files, 5.times { Dir.glob("file.jar!/**/*") } is:

1.7.10: user    1m24.332s
HEAD:   user    0m19.604s
1ac6e61
@ratnikov ratnikov Made the jar caching respect changing of jars on the filesystem. f780bce
@enebo enebo merged commit 585f05b into jruby:jruby-1_7
@enebo enebo added this to the JRuby 1.7.12 milestone
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Mar 12, 2014
  1. @ratnikov

    Implement jar directory snapshot caching.

    ratnikov authored
    This improves jar globbing performance by about 4x on a simple benchmark.
    
    For a jar containing 9156 files, 5.times { Dir.glob("file.jar!/**/*") } is:
    
    1.7.10: user    1m24.332s
    HEAD:   user    0m19.604s
  2. @ratnikov
This page is out of date. Refresh to see the latest.
View
120 core/src/main/java/org/jruby/util/JarCache.java
@@ -0,0 +1,120 @@
+package org.jruby.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.WeakHashMap;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+
+/**
+ * Instances of JarCache provides jar index information.
+ *
+ * <p>
+ * Implementation is threadsafe.
+ *
+ * Since loading index information is O(jar-entries) we cache the snapshot in a WeakHashMap.
+ * The implementation pays attention to lastModified timestamp of the jar and will invalidate
+ * the cache entry if jar has been updated since the snapshot calculation.
+ * </p>
+ */
+class JarCache {
+ static class JarIndex {
+ private static final String ROOT_KEY = "";
+
+ final Map<String, String[]> cachedDirEntries;
+ final JarFile jar;
+ final long snapshotCalculated;
+
+ JarIndex(String jarPath) throws IOException {
+ this.jar = new JarFile(jarPath);
+ this.snapshotCalculated = new File(jarPath).lastModified();
+
+ Map<String, Set<String>> mutableCache = new HashMap<String, Set<String>>();
+
+ // Always have a root directory
+ mutableCache.put(ROOT_KEY, new HashSet<String>());
+
+ Enumeration<JarEntry> entries = jar.entries();
+ while (entries.hasMoreElements()) {
+ JarEntry entry = entries.nextElement();
+ String path = entry.getName();
+
+ int lastPathSep;
+ while ((lastPathSep = path.lastIndexOf('/')) != -1) {
+ String dirPath = path.substring(0, lastPathSep);
+
+ if (!mutableCache.containsKey(dirPath)) {
+ mutableCache.put(dirPath, new HashSet<String>());
+ }
+
+ String entryPath = path.substring(lastPathSep + 1);
+
+ // "" is not really a child path, even if we see foo/ entry
+ if (entryPath.length() > 0) {
+ mutableCache.get(dirPath).add(entryPath);
+ }
+
+ path = dirPath;
+ }
+
+ mutableCache.get(ROOT_KEY).add(path);
+ }
+
+ Map<String, String[]> cachedDirEntries = new HashMap<String, String[]>();
+ for (Map.Entry<String, Set<String>> entry : mutableCache.entrySet()) {
+ cachedDirEntries.put(entry.getKey(), entry.getValue().toArray(new String[0]));
+ }
+
+ this.cachedDirEntries = Collections.unmodifiableMap(cachedDirEntries);
+ }
+
+ public JarEntry getJarEntry(String entryPath) {
+ return jar.getJarEntry(entryPath);
+ }
+
+ public void release() {
+ try {
+ jar.close();
+ } catch (IOException ioe) { }
+ }
+
+ public boolean isValid() {
+ return new File(jar.getName()).lastModified() <= snapshotCalculated;
+ }
+ }
+
+ private final Map<String, JarIndex> indexCache = new WeakHashMap<String, JarIndex>();
+
+ public JarIndex getIndex(String jarPath) {
+ String cacheKey = jarPath;
+
+ synchronized (indexCache) {
+ JarIndex index = indexCache.get(cacheKey);
+
+ // If the index is invalid (jar has changed since snapshot was loaded)
+ // we can just treat it as a "new" index and cache the updated results.
+ if (index != null && !index.isValid()) {
+ index.release();
+ index = null;
+ }
+
+ if (index == null) {
+ try {
+ index = new JarIndex(jarPath);
+ indexCache.put(cacheKey, index);
+ } catch (IOException ioe) {
+ return null;
+ }
+ }
+
+ return index;
+ }
+ }
+}
View
55 core/src/main/java/org/jruby/util/JarDirectoryResource.java
@@ -1,10 +1,5 @@
package org.jruby.util;
-import java.util.Enumeration;
-import java.util.HashSet;
-import java.util.jar.JarFile;
-import java.util.jar.JarEntry;
-
/**
* Represents a directory in a jar.
*
@@ -13,28 +8,13 @@
* just that.</p>
*/
class JarDirectoryResource extends JarResource {
- public static JarDirectoryResource create(JarFile jar, String path) {
- String dirPath = path.endsWith("/") ? path : path + "/";
-
- // We always should have something in the jar, so "/" always corresponds to a directory
- if ("/".equals(dirPath)) {
- return new JarDirectoryResource(jar, dirPath);
- }
-
- Enumeration<JarEntry> entries = jar.entries();
- while (entries.hasMoreElements()) {
- if (entries.nextElement().getName().startsWith(dirPath)) {
- return new JarDirectoryResource(jar, dirPath);
- }
- }
- return null;
- }
-
private final String path;
+ private final String[] contents;
- private JarDirectoryResource(JarFile jar, String path) {
- super(jar);
+ JarDirectoryResource(String jarPath, String path, String[] contents) {
+ super(jarPath);
this.path = path;
+ this.contents = contents;
}
@Override
@@ -67,32 +47,7 @@ public long length() {
@Override
public String[] list() {
- HashSet<String> dirs = new HashSet<String>();
-
- Enumeration<JarEntry> entries = jar.entries();
- while (entries.hasMoreElements()) {
- String entryPath = entries.nextElement().getName();
-
- String subPath;
- if (isRoot()) {
- subPath = entryPath;
- } else if (entryPath.startsWith(path) && (entryPath.length() > path.length())) {
- subPath = entryPath.substring(path.length());
- } else {
- // entry's path doesn't match the directory or it's <this> path
- continue;
- }
-
- // If entry is <path>/foo/bar.txt, we want to only return 'foo'
- int slashIndex = subPath.indexOf('/');
- if (slashIndex > 0) {
- subPath = subPath.substring(0, slashIndex);
- }
-
- dirs.add(subPath);
- }
-
- return dirs.toArray(new String[0]);
+ return contents;
}
public boolean isRoot() {
View
19 core/src/main/java/org/jruby/util/JarFileResource.java
@@ -1,13 +1,5 @@
package org.jruby.util;
-import org.jruby.RubyFile;
-import org.jruby.Ruby;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Enumeration;
-import java.util.*;
-import java.util.zip.ZipEntry;
import java.util.jar.JarFile;
import java.util.jar.JarEntry;
@@ -21,15 +13,10 @@
* </p>
*/
class JarFileResource extends JarResource {
- public static JarFileResource create(JarFile jar, String path) {
- JarEntry entry = jar.getJarEntry(path);
- return ((entry != null) && !entry.isDirectory()) ? new JarFileResource(jar, entry) : null;
- }
-
- private final ZipEntry entry;
+ private final JarEntry entry;
- private JarFileResource(JarFile jar, ZipEntry entry) {
- super(jar);
+ JarFileResource(String jarPath, JarEntry entry) {
+ super(jarPath);
this.entry = entry;
}
View
59 core/src/main/java/org/jruby/util/JarResource.java
@@ -2,17 +2,17 @@
import jnr.posix.FileStat;
import jnr.posix.POSIX;
-import org.jruby.Ruby;
import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.jar.JarEntry;
import java.util.jar.JarFile;
public abstract class JarResource implements FileResource {
private static Pattern PREFIX_MATCH = Pattern.compile("^(?:jar:)?(?:file:)?(.*)$");
+ private static final JarCache jarCache = new JarCache();
+
public static JarResource create(String pathname) {
Matcher matcher = PREFIX_MATCH.matcher(pathname);
String sanitized = matcher.matches() ? matcher.group(1) : pathname;
@@ -22,53 +22,56 @@ public static JarResource create(String pathname) {
return null;
}
- JarFile jar;
- try {
- jar = new JarFile(sanitized.substring(0, bang));
- } catch (IOException e) {
- return null;
- }
-
+ String jarPath = sanitized.substring(0, bang);
String slashPath = sanitized.substring(bang + 1);
if (!slashPath.startsWith("/")) {
slashPath = "/" + slashPath;
}
// TODO: Do we really need to support both test.jar!foo/bar.rb and test.jar!/foo/bar.rb cases?
- JarResource resource = createJarResource(jar, slashPath);
+ JarResource resource = createJarResource(jarPath, slashPath);
if (resource == null) {
- resource = createJarResource(jar, slashPath.substring(1));
+ resource = createJarResource(jarPath, slashPath.substring(1));
}
-
+
return resource;
}
- private static JarResource createJarResource(JarFile jar, String path) {
- // Try as directory first, file second, because if test.jar contains:
- //
- // test/
- // test/foo.rb
- //
- // file:test.jar!test should be a directory and not a file.
- JarResource resource = JarDirectoryResource.create(jar, path);
- if (resource == null) {
- resource = JarFileResource.create(jar, path);
+ private static JarResource createJarResource(String jarPath, String path) {
+ JarCache.JarIndex index = jarCache.getIndex(jarPath);
+
+ if (index == null) {
+ // Jar doesn't exist
+ return null;
}
- return resource;
+
+ // Try it as directory first, because jars tend to have foo/ entries
+ // and it's not really possible disambiguate between files and directories.
+ String[] entries = index.cachedDirEntries.get(path);
+ if (entries != null) {
+ return new JarDirectoryResource(jarPath, path, entries);
+ }
+
+ JarEntry jarEntry = index.getJarEntry(path);
+ if (jarEntry != null) {
+ return new JarFileResource(jarPath, jarEntry);
+ }
+
+ return null;
}
- protected final JarFile jar;
+ private final String jarPath;
private final JarFileStat fileStat;
- protected JarResource(JarFile jar) {
- this.jar = jar;
+ protected JarResource(String jarPath) {
+ this.jarPath = jarPath;
this.fileStat = new JarFileStat(this);
}
@Override
public String absolutePath() {
- return jar.getName() + "!" + entryName();
+ return jarPath + "!" + entryName();
}
@Override
View
15 spec/java_integration/utilities/jar_glob_spec.rb
@@ -17,7 +17,7 @@ def jar_entries(full_entries)
before :all do
FileUtils.rm "glob_test/glob-test.jar", :force => true
begin
- FileUtils.rmdir "glob_test"
+ FileUtils.rm_rf "glob_test"
rescue Errno::ENOENT; end
FileUtils.mkdir_p 'glob_target'
@@ -76,6 +76,19 @@ def jar_entries(full_entries)
])
end
end
+
+ it "respects jar content filesystem changes" do
+ jar_path = File.join(Dir.pwd, 'glob_test', 'modified-glob-test.jar')
+ FileUtils.cp 'glob-test.jar', jar_path
+
+ lambda do
+ # Need to sleep a little bit to make sure that modified time is updated
+ sleep 1
+
+ # This should delete the /glob_target and /glob_target/bar.txt entries
+ `zip -d #{jar_path} glob_target/bar.txt`
+ end.should change { Dir.glob("#{jar_path}!/**/*").size }.by -2
+ end
end
describe 'Dir globs (Dir.glob and Dir.[]) +' do
Something went wrong with that request. Please try again.