Skip to content

Commit

Permalink
New way of finding hidden and visible sheets to avoid gc overhead lim…
Browse files Browse the repository at this point in the history
…it exceeded exception.

Just proof of concept, still needs to be tidied up
  • Loading branch information
Matthew authored and Matthew committed Sep 1, 2018
1 parent c119289 commit f56f9eb
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 9 deletions.
89 changes: 89 additions & 0 deletions src/main/java/com/poiji/bind/mapping/WorkBookContentHandler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package com.poiji.bind.mapping;

import java.util.ArrayList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/**
*
* @author Matthew 2018/09/01
*/
public class WorkBookContentHandler implements ContentHandler {

public List<WorkBookSheet> sheets = new ArrayList<>();
private WorkBookSheet individualSheet;

@Override
public void setDocumentLocator(Locator locator) {
}

@Override
public void startDocument() throws SAXException {
}

@Override
public void endDocument() throws SAXException {
}

@Override
public void startPrefixMapping(String prefix, String uri) throws SAXException {
}

@Override
public void endPrefixMapping(String prefix) throws SAXException {
}

@Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {

if (qName.equals("sheet")) {
individualSheet = new WorkBookSheet();

for (int i = 0; i < atts.getLength(); i++) {

// Attribute: name:Sheet3
// Attribute: sheetId:3
// Attribute: state:hidden
if (atts.getQName(i).equals("name")) {
individualSheet.name = atts.getValue(i);
}
if (atts.getQName(i).equals("sheetId")) {
individualSheet.sheetId = atts.getValue(i);
}
if (atts.getQName(i).equals("state")) {
individualSheet.state = atts.getValue(i);
}
}
}
}

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {

if (qName.equals("sheet")) {
sheets.add(individualSheet);
individualSheet = null;
}

}

@Override
public void characters(char[] ch, int start, int length) throws SAXException {
}

@Override
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
}

@Override
public void processingInstruction(String target, String data) throws SAXException {
}

@Override
public void skippedEntity(String name) throws SAXException {
}

}
18 changes: 18 additions & 0 deletions src/main/java/com/poiji/bind/mapping/WorkBookSheet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.poiji.bind.mapping;

/**
*
* @author Matthew 2018/09/01
*/
public class WorkBookSheet {

public String name;
public String sheetId;
public String state;

@Override
public String toString() {
return "WorkBookSheet{" + "name=" + name + ", sheetId=" + sheetId + ", state=" + state + '}';
}

}
99 changes: 90 additions & 9 deletions src/main/java/com/poiji/bind/mapping/XSSFUnmarshaller.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,27 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.function.Consumer;
import org.apache.poi.ss.usermodel.Workbook;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import static org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
////
//import java.io.InputStream;
//
//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
//import org.apache.poi.openxml4j.opc.OPCPackage;
//import org.apache.poi.xssf.eventusermodel.XSSFReader;
//import org.apache.poi.xssf.model.SharedStringsTable;
//import org.apache.xerces.parsers.SAXParser;
//import org.xml.sax.InputSource;
//import org.xml.sax.XMLReader;
//
//import poi.example.eventmodel.SheetHandler;
//import poi.example.eventmodel.WorkbookHandler;
//import poi.example.eventmodel.mapping.annotation.Sheet;

/**
* Created by hakan on 22/10/2017
Expand All @@ -42,12 +58,78 @@ abstract class XSSFUnmarshaller implements Unmarshaller {

<T> void unmarshal0(Class<T> type, Consumer<? super T> consumer, OPCPackage open) throws IOException, SAXException, OpenXML4JException {

//ISSUE #55
XSSFWorkbook wb = new XSSFWorkbook(open);
Workbook workbook = new SXSSFWorkbook(wb);
//work out which sheet must process
int processIndex = PoijiOptions.getSheetIndexToProcess(workbook, options);
WorkBookContentHandler wbch = null;
try {
XSSFReader workbookReader = new XSSFReader(open);
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser parser = spf.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setContentHandler(new WorkBookContentHandler());
InputSource is = new InputSource(workbookReader.getWorkbookData());
reader.parse(is);
wbch = (WorkBookContentHandler) reader.getContentHandler();
} catch (ParserConfigurationException | SAXException | IOException e) {
throw new PoijiException("Problem occurred while reading workbook data", e);
}

// System.out.println("wbch" + wbch);
int processIndex = 0;
if (wbch != null) {

// System.out.println("wbch.sheets" + wbch.sheets);
int findIndex;
//if given sheet index to use, use that
if (options.sheetIndex() != null && options.sheetIndex() > -1) {
findIndex = options.sheetIndex();
} else {
//else default
findIndex = 0;
}

// System.out.println("findIndex " + findIndex);
int sheetIndex;
//if set to hignore hidden find the visiable sheet that matches the index requested
if (options.ignoreHiddenSheets()) {
// System.out.println("ignoreHiddenSheets true ");
Integer visiableIndex = null;

int sheetCount = 0;
for (WorkBookSheet s : wbch.sheets) {

// System.out.println("WorkBookSheet s " + s);
if (s.state == null) {
// System.out.println("WorkBookSheet NULL");
//cannot use sheet is, cos that is its id not its index, they seem to be diffent things
visiableIndex = sheetCount;
}

sheetCount++;

}

// System.out.println("visiableIndex" + visiableIndex);
if (visiableIndex != null) {
sheetIndex = visiableIndex;
} else {
//if no sheet found, default back
sheetIndex = findIndex;
}

// System.out.println("sheetIndex" + sheetIndex);
} else {

//if dont want to ignore hidden sheets, use index given or default
sheetIndex = findIndex;
// System.out.println("ELSE sheetIndex" + sheetIndex);
}
processIndex = sheetIndex;
}

// System.out.println("processIndex" + processIndex);
// XSSFWorkbook wb = new XSSFWorkbook(open);
// Workbook workbook = new SXSSFWorkbook(wb);
// //work out which sheet must process
// int processIndex = PoijiOptions.getSheetIndexToProcess(workbook, options);
ReadOnlySharedStringsTable readOnlySharedStringsTable = new ReadOnlySharedStringsTable(open);
XSSFReader xssfReader = new XSSFReader(open);
StylesTable styles = xssfReader.getStylesTable();
Expand All @@ -57,7 +139,6 @@ <T> void unmarshal0(Class<T> type, Consumer<? super T> consumer, OPCPackage open

while (iter.hasNext()) {
try (InputStream stream = iter.next()) {
//if (index == options.sheetIndex()) {
if (index == processIndex) {
processSheet(styles, readOnlySharedStringsTable, type, stream, consumer);
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.poiji.deserialize;

import com.poiji.bind.Poiji;
import com.poiji.deserialize.model.byid.Person;
import com.poiji.option.PoijiOptions;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import static com.poiji.util.Data.unmarshallingPersons;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;

@RunWith(Parameterized.class)
public class DeserializersHiddenSheetLargeTest {

private final String path;
private final List<Person> expectedPersonList;
private final Class<?> expectedException;

public DeserializersHiddenSheetLargeTest(String path, List<Person> expectedPersonList, Class<?> expectedException) {
this.path = path;
this.expectedPersonList = expectedPersonList;
this.expectedException = expectedException;
}

@Parameterized.Parameters(name = "{index}: ({0})={1}")
public static Iterable<Object[]> queries() {
return Arrays.asList(new Object[][]{
{"src/test/resources/hidden_very_large.xlsx", unmarshallingPersons(), null},
{"src/test/resources/hidden_very_large.xls", unmarshallingPersons(), null}
});
}

@Test
public void testIgnoreHiddenSheets() {
try {
PoijiOptions poijiOptions = PoijiOptions.PoijiOptionsBuilder.settings().ignoreHiddenSheets(true).build();

List<Person> people = Poiji.fromExcel(new File(path), Person.class, poijiOptions);
assertEquals(expectedPersonList.get(0).getRow(), people.get(0).getRow());
assertEquals(expectedPersonList.get(1).getRow(), people.get(1).getRow());
assertEquals(expectedPersonList.get(2).getRow(), people.get(2).getRow());
assertEquals(expectedPersonList.get(3).getRow(), people.get(3).getRow());
assertEquals(expectedPersonList.get(4).getRow(), people.get(4).getRow());
} catch (Exception e) {
if (expectedException == null) {
fail(e.getMessage());
} else {
assertThat(e, instanceOf(expectedException));
}
}
}

@Test
public void testProcessHiddenSheets() {
PoijiOptions poijiOptions = PoijiOptions.PoijiOptionsBuilder.settings().ignoreHiddenSheets(false).build();
List<Person> people = Poiji.fromExcel(new File(path), Person.class, poijiOptions);
assertEquals(people.size(), 0);
}
}
Binary file added src/test/resources/hidden_very_large.xls
Binary file not shown.
Binary file added src/test/resources/hidden_very_large.xlsx
Binary file not shown.

0 comments on commit f56f9eb

Please sign in to comment.