Skip to content

Loading…

De lazy-loading changes #12

Merged
merged 3 commits into from

2 participants

@NightWhistler

Hoi Paul,

Dit zijn de wijzigingen die ik voor PageTurner heb gemaakt: het is een heel erg basic lazy-loading oplossing waarbij voor sommige resource de data pas geladen wordt bij de eerste aanroep naar getData().

Ik heb het zoveel mogelijk gedocumenteerd, kijk maar of je het geschikt vindt om op te nemen in de main-line :)

Alex

@psiegman
Owner

Hoi Alex,

Ziet er prima uit !
Ik ga er zo snel mogelijk naar kijken, maar met de kerst kan het een aantal dagen duren.

Bedankt !

Paul

@psiegman psiegman merged commit 113a3de into psiegman:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Dec 15, 2011
  1. @NightWhistler
Commits on Dec 16, 2011
  1. @NightWhistler
Commits on Dec 20, 2011
  1. @NightWhistler

    Extra documentation.

    NightWhistler committed
View
89 epublib-core/src/main/java/nl/siegmann/epublib/domain/Resource.java
@@ -1,10 +1,16 @@
package nl.siegmann.epublib.domain;
import java.io.ByteArrayInputStream;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Serializable;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import nl.siegmann.epublib.Constants;
import nl.siegmann.epublib.service.MediatypeService;
@@ -31,6 +37,11 @@
private MediaType mediaType;
private String inputEncoding = Constants.ENCODING;
private byte[] data;
+
+ private String fileName;
+ private long cachedSize;
+
+ private static final Logger LOG = LoggerFactory.getLogger(Resource.class);
/**
* Creates an empty Resource with the given href.
@@ -104,6 +115,21 @@ public Resource(InputStream in, String href) throws IOException {
}
/**
+ * Creates a Lazy resource, by not actually loading the data for this entry.
+ *
+ * The data will be loaded on the first call to getData()
+ *
+ * @param fileName the fileName for the epub we're created from.
+ * @param size the size of this resource.
+ * @param href The resource's href within the epub.
+ */
+ public Resource( String fileName, long size, String href) {
+ this( null, null, href, MediatypeService.determineMediaType(href));
+ this.fileName = fileName;
+ this.cachedSize = size;
+ }
+
+ /**
* Creates a resource with the given id, data, mediatype at the specified href.
* Assumes that if the data is of a text type (html/css/etc) then the encoding will be UTF-8
*
@@ -142,17 +168,52 @@ public Resource(String id, byte[] data, String href, MediaType mediaType, String
* @throws IOException
*/
public InputStream getInputStream() throws IOException {
- return new ByteArrayInputStream(data);
+ return new ByteArrayInputStream(getData());
}
/**
* The contents of the resource as a byte[]
*
+ * If this resource was lazy-loaded and the data was not yet loaded,
+ * it will be loaded into memory at this point.
+ * This included opening the zip file, so expect a first load to be slow.
+ *
* @return The contents of the resource
*/
- public byte[] getData() {
+ public byte[] getData() throws IOException {
+
+ if ( data == null ) {
+
+ LOG.info("Initializing lazy resource " + fileName + "#" + this.href );
+
+ ZipInputStream in = new ZipInputStream(new FileInputStream(this.fileName));
+
+ for(ZipEntry zipEntry = in.getNextEntry(); zipEntry != null; zipEntry = in.getNextEntry()) {
+ if(zipEntry.isDirectory()) {
+ continue;
+ }
+
+ if ( zipEntry.getName().endsWith(this.href)) {
+ this.data = IOUtil.toByteArray(in);
+ }
+ }
+
+ in.close();
+ }
+
return data;
}
+
+ /**
+ * Tells this resource to release its cached data.
+ *
+ * If this resource was not lazy-loaded, this is a no-op.
+ */
+ public void close() {
+ if ( this.fileName != null ) {
+ this.data = null;
+ }
+ }
/**
* Sets the data of the Resource.
@@ -163,8 +224,30 @@ public InputStream getInputStream() throws IOException {
public void setData(byte[] data) {
this.data = data;
}
+
+ /**
+ * Returns if the data for this resource has been loaded into memory.
+ *
+ * @return true if data was loaded.
+ */
+ public boolean isInitialized() {
+ return data != null;
+ }
/**
+ * Returns the size of this resource in bytes.
+ *
+ * @return the size.
+ */
+ public long getSize() {
+ if ( data != null ) {
+ return data.length;
+ }
+
+ return cachedSize;
+ }
+
+ /**
* If the title is found by scanning the underlying html document then it is cached here.
*
* @return
@@ -243,7 +326,7 @@ public void setInputEncoding(String encoding) {
* @throws IOException
*/
public Reader getReader() throws IOException {
- return new XmlStreamReader(new ByteArrayInputStream(data), inputEncoding);
+ return new XmlStreamReader(new ByteArrayInputStream(getData()), inputEncoding);
}
/**
View
74 epublib-core/src/main/java/nl/siegmann/epublib/epub/EpubReader.java
@@ -1,13 +1,16 @@
package nl.siegmann.epublib.epub;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import nl.siegmann.epublib.Constants;
import nl.siegmann.epublib.domain.Book;
-import nl.siegmann.epublib.domain.Metadata;
+import nl.siegmann.epublib.domain.MediaType;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.Resources;
import nl.siegmann.epublib.service.MediatypeService;
@@ -32,7 +35,7 @@
public Book readEpub(InputStream in) throws IOException {
return readEpub(in, Constants.ENCODING);
- }
+ }
public Book readEpub(ZipInputStream in) throws IOException {
return readEpub(in, Constants.ENCODING);
@@ -48,6 +51,42 @@ public Book readEpub(ZipInputStream in) throws IOException {
*/
public Book readEpub(InputStream in, String encoding) throws IOException {
return readEpub(new ZipInputStream(in), encoding);
+ }
+
+ /**
+ * Reads this EPUB without loading all resources into memory.
+ *
+ * @param fileName the file to load
+ * @param encoding the encoding for XHTML files
+ * @param lazyLoadedTypes a list of the MediaType to load lazily
+ * @return
+ * @throws IOException
+ */
+ public Book readEpubLazy( String fileName, String encoding, List<MediaType> lazyLoadedTypes ) throws IOException {
+ Book result = new Book();
+ Resources resources = readLazyResources(fileName, encoding, lazyLoadedTypes);
+ handleMimeType(result, resources);
+ String packageResourceHref = getPackageResourceHref(resources);
+ Resource packageResource = processPackageResource(packageResourceHref, result, resources);
+ result.setOpfResource(packageResource);
+ Resource ncxResource = processNcxResource(packageResource, result);
+ result.setNcxResource(ncxResource);
+ result = postProcessBook(result);
+ return result;
+ }
+
+
+ /**
+ * Reads this EPUB without loading any resources into memory.
+ *
+ * @param fileName the file to load
+ * @param encoding the encoding for XHTML files
+ *
+ * @return
+ * @throws IOException
+ */
+ public Book readEpubLazy( String fileName, String encoding ) throws IOException {
+ return readEpubLazy(fileName, encoding, Arrays.asList(MediatypeService.mediatypes) );
}
public Book readEpub(ZipInputStream in, String encoding) throws IOException {
@@ -108,6 +147,37 @@ private String getPackageResourceHref(Resources resources) {
private void handleMimeType(Book result, Resources resources) {
resources.remove("mimetype");
}
+
+ private Resources readLazyResources( String fileName, String defaultHtmlEncoding,
+ List<MediaType> lazyLoadedTypes) throws IOException {
+
+ ZipInputStream in = new ZipInputStream(new FileInputStream(fileName));
+
+ Resources result = new Resources();
+ for(ZipEntry zipEntry = in.getNextEntry(); zipEntry != null; zipEntry = in.getNextEntry()) {
+ if(zipEntry.isDirectory()) {
+ continue;
+ }
+
+ String href = zipEntry.getName();
+ MediaType mediaType = MediatypeService.determineMediaType(href);
+
+ Resource resource;
+
+ if ( lazyLoadedTypes.contains(mediaType) ) {
+ resource = new Resource(fileName, zipEntry.getSize(), href);
+ } else {
+ resource = new Resource( in, href );
+ }
+
+ if(resource.getMediaType() == MediatypeService.XHTML) {
+ resource.setInputEncoding(defaultHtmlEncoding);
+ }
+ result.add(resource);
+ }
+
+ return result;
+ }
private Resources readResources(ZipInputStream in, String defaultHtmlEncoding) throws IOException {
Resources result = new Resources();
Something went wrong with that request. Please try again.