Permalink
Browse files

NCX parsing improvements

  • Loading branch information...
1 parent 06b4d74 commit 7094707dd0749ba3c305f59197e3a98bde63d4c6 @neror neror committed Nov 9, 2012
Showing with 80 additions and 79 deletions.
  1. +66 −60 EPUB3.c
  2. +4 −3 EPUB3_private.h
  3. +1 −8 TestEPUB3Processor/check_EPUB3.c
  4. +9 −8 TestEPUB3Processor/check_EPUB3_parsing.c
View
@@ -131,10 +131,9 @@ EXPORT char * EPUB3TocItemCopyPath(EPUB3TocItemRef tocItem)
{
assert(tocItem != NULL);
- if(tocItem->manifestItem == NULL || tocItem->manifestItem->href == NULL) return NULL;
+ if(tocItem->href == NULL) return NULL;
- char * path = strdup(tocItem->manifestItem->href);
- return path;
+ return strdup(tocItem->href);
}
#pragma mark - Base Object
@@ -176,6 +175,7 @@ EPUB3Ref EPUB3Create()
memory->metadata = NULL;
memory->manifest = NULL;
memory->spine = NULL;
+ memory->toc = NULL;
memory->archive = NULL;
memory->archivePath = NULL;
memory->archiveFileCount = 0;
@@ -435,8 +435,8 @@ EPUB3TocItemRef EPUB3TocItemCreate()
{
EPUB3TocItemRef memory = malloc(sizeof(struct EPUB3TocItem));
memory = EPUB3ObjectInitWithTypeID(memory, kEPUB3TocItemTypeID);
- memory->manifestItem = NULL;
memory->title = NULL;
+ memory->href = NULL;
memory->parent = NULL;
memory->childCount = 0;
memory->childrenHead = NULL;
@@ -455,9 +455,9 @@ void EPUB3TocItemRelease(EPUB3TocItemRef item)
if(item == NULL) return;
if(item->_type.refCount == 1) {
- item->manifestItem = NULL; // zero weak ref
item->parent = NULL; // zero weak ref
EPUB3_FREE_AND_NULL(item->title);
+ EPUB3_FREE_AND_NULL(item->href);
int totalChildrenToFree = item->childCount;
EPUB3TocItemChildListItemPtr itemPtr = item->childrenHead;
while(itemPtr != NULL) {
@@ -474,12 +474,6 @@ void EPUB3TocItemRelease(EPUB3TocItemRef item)
EPUB3ObjectRelease(item);
}
-void EPUB3TocItemSetManifestItem(EPUB3TocItemRef tocItem, EPUB3ManifestItemRef manifestItem)
-{
- assert(tocItem != NULL);
- tocItem->manifestItem = manifestItem;
-}
-
void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item)
{
assert(toc != NULL);
@@ -855,6 +849,10 @@ EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename)
epub->spine = EPUB3SpineCreate();
}
+ if(epub->toc == NULL) {
+ epub->toc = EPUB3TocCreate();
+ }
+
void *buffer = NULL;
uint32_t bufferSize = 0;
uint32_t bytesCopied;
@@ -888,14 +886,27 @@ EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename)
return error;
}
-void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode)
+void _EPUB3DumpXMLParseContextStack(EPUB3XMLParseContextPtr *ctxPtr)
+{
+ EPUB3XMLParseContextPtr top = *ctxPtr;
+ fprintf(stderr, "== Parse Context Stack ==\n");
+ for(;;) {
+ fprintf(stderr, "%s\n", (const char *)top->tagName);
+ if(top->state == kEPUB3NCXStateRoot || top->state == kEPUB3OPFStateRoot) break;
+ top--;
+ }
+ fprintf(stderr, "== END Context Stack ==\n");
+}
+
+void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode, void * userInfo)
{
(*ctxPtr)++;
(*ctxPtr)->state = state;
(*ctxPtr)->tagName = tagName;
(*ctxPtr)->attributeCount = attrCount;
(*ctxPtr)->attributes = attrs;
(*ctxPtr)->shouldParseTextNode = shouldParseTextNode;
+ (*ctxPtr)->userInfo = userInfo;
}
void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr)
@@ -924,7 +935,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReade
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
- (void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES, NULL);
// Only parse text node for the identifier marked as unique-identifier in the package tag
// see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier
@@ -984,7 +995,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReade
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
- (void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} else {
if(xmlStrcmp(name, BAD_CAST "item") == 0) {
EPUB3ManifestItemRef newItem = EPUB3ManifestItemCreate();
@@ -1043,7 +1054,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForSpineInOPF(EPUB3Ref epub, xmlTextReaderPt
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
- (void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} else {
if(xmlStrcmp(name, BAD_CAST "itemref") == 0) {
EPUB3SpineItemRef newItem = EPUB3SpineItemCreate();
@@ -1113,13 +1124,13 @@ EPUB3Error EPUB3ParseXMLReaderNodeForOPF(EPUB3Ref epub, xmlTextReaderPtr reader,
}
}
else if(xmlStrcmp(name, BAD_CAST "metadata") == 0) {
- (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES, NULL);
}
else if(xmlStrcmp(name, BAD_CAST "manifest") == 0) {
- (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
}
else if(xmlStrcmp(name, BAD_CAST "spine") == 0) {
- (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateSpine, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateSpine, name, 0, NULL, kEPUB3_YES, NULL);
}
}
break;
@@ -1211,10 +1222,11 @@ EPUB3Error EPUB3ParseNCXFromData(EPUB3Ref epub, void * buffer, uint32_t bufferSi
EPUB3XMLParseContextPtr currentContext = &contextStack[0];
int retVal = xmlTextReaderRead(reader);
- currentContext->state = kEPUB3OPFStateRoot;
+ currentContext->state = kEPUB3NCXStateRoot;
currentContext->tagName = xmlTextReaderConstName(reader);
while(retVal == 1)
{
+// _EPUB3DumpXMLParseContextStack(&currentContext);
error = EPUB3ParseXMLReaderNodeForNCX(epub, reader, &currentContext);
retVal = xmlTextReaderRead(reader);
}
@@ -1242,12 +1254,12 @@ EPUB3Error EPUB3ParseXMLReaderNodeForNCX(EPUB3Ref epub, xmlTextReaderPtr reader,
if(name != NULL && currentNodeType != XML_READER_TYPE_COMMENT) {
switch((*currentContext)->state)
{
- case kEPUB3OPFStateRoot:
+ case kEPUB3NCXStateRoot:
{
// fprintf(stdout, "NCX ROOT: %s\n", name);
if(currentNodeType == XML_READER_TYPE_ELEMENT) {
if(xmlStrcmp(name, BAD_CAST "navMap") == 0) {
- (void)EPUB3SaveParseContext(currentContext, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_YES);
+ (void)EPUB3SaveParseContext(currentContext, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_YES, NULL);
}
}
break;
@@ -1274,36 +1286,23 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForNavMapInNCX(EPUB3Ref epub, xmlTextReaderP
assert(reader != NULL);
EPUB3Error error = kEPUB3Success;
-// const xmlChar *name = xmlTextReaderConstLocalName(reader);
-// xmlReaderTypes nodeType = xmlTextReaderNodeType(reader);
-//
-// switch(nodeType)
-// {
-// case XML_READER_TYPE_ELEMENT:
-// {
-// if(!xmlTextReaderIsEmptyElement(reader)) {
-// (void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES);
-//
-// // Only parse text node for the identifier marked as unique-identifier in the package tag
-// // see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier
-// if(xmlStrcmp(name, BAD_CAST "identifier") == 0) {
-// if(xmlTextReaderHasAttributes(reader)) {
-// xmlChar * itemId = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
-// if(itemId == NULL) {
-// (*context)->shouldParseTextNode = kEPUB3_NO;
-// }
-// else if(itemId != NULL && xmlStrcmp(itemId, BAD_CAST epub->metadata->_uniqueIdentifierID) != 0) {
-// (*context)->shouldParseTextNode = kEPUB3_NO;
-// EPUB3_FREE_AND_NULL(itemId);
-// }
-// }
-// }
-//
-// }
-// break;
-// }
-// case XML_READER_TYPE_TEXT:
-// {
+ const xmlChar *name = xmlTextReaderConstLocalName(reader);
+ xmlReaderTypes nodeType = xmlTextReaderNodeType(reader);
+
+ switch(nodeType)
+ {
+ case XML_READER_TYPE_ELEMENT:
+ {
+ if(!xmlTextReaderIsEmptyElement(reader)) {
+ if(xmlStrcmp(name, BAD_CAST "navPoint") == 0) {
+ EPUB3TocItemRef newTocItem = EPUB3TocItemCreate();
+ (void)EPUB3SaveParseContext(context, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_NO, newTocItem);
+ }
+ }
+ break;
+ }
+ case XML_READER_TYPE_TEXT:
+ {
// const xmlChar *value = xmlTextReaderValue(reader);
// if(value != NULL && (*context)->shouldParseTextNode) {
// if(xmlStrcmp((*context)->tagName, BAD_CAST "title") == 0) {
@@ -1316,15 +1315,22 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForNavMapInNCX(EPUB3Ref epub, xmlTextReaderP
// (void)EPUB3MetadataSetLanguage(epub->metadata, (const char *)value);
// }
// }
-// break;
-// }
-// case XML_READER_TYPE_END_ELEMENT:
-// {
-// (void)EPUB3PopAndFreeParseContext(context);
-// break;
-// }
-// default: break;
-// }
+ break;
+ }
+ case XML_READER_TYPE_END_ELEMENT:
+ {
+ if(xmlStrcmp(name, BAD_CAST "navPoint") == 0) {
+ if((*context)->userInfo != NULL) {
+ EPUB3TocItemRef newTocItem = (*context)->userInfo;
+ EPUB3TocAddRootItem(epub->toc, newTocItem);
+ EPUB3TocItemRelease(newTocItem);
+ }
+ (void)EPUB3PopAndFreeParseContext(context);
+ }
+ break;
+ }
+ default: break;
+ }
return error;
}
View
@@ -56,6 +56,7 @@ typedef struct _EPUB3OPFParseContext {
int32_t attributeCount;
char ** attributes;
EPUB3Bool shouldParseTextNode;
+ void * userInfo;
} EPUB3XMLParseContext;
typedef EPUB3XMLParseContext * EPUB3XMLParseContextPtr;
@@ -157,11 +158,12 @@ struct EPUB3Toc {
struct EPUB3TocItem {
EPUB3Type _type;
char * title;
+ char * href;
EPUB3TocItemRef parent; //weak ref
int32_t childCount;
EPUB3TocItemChildListItemPtr childrenHead;
EPUB3TocItemChildListItemPtr childrenTail;
- EPUB3ManifestItemRef manifestItem; //weak ref
+// EPUB3ManifestItemRef manifestItem; //weak ref
};
#pragma mark - Base Object
@@ -226,12 +228,11 @@ void EPUB3TocItemRetain(EPUB3TocItemRef item);
void EPUB3TocItemRelease(EPUB3TocItemRef item);
void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item);
void EPUB3TocItemAppendChild(EPUB3TocItemRef parent, EPUB3TocItemRef child);
-void EPUB3TocItemSetManifestItem(EPUB3TocItemRef tocItem, EPUB3ManifestItemRef manifestItem);
#pragma mark - XML Parsing
EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename);
-void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode);
+void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode, void * userInfo);
void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr);
EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context);
EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context);
@@ -155,12 +155,10 @@ START_TEST(test_epub3_toc)
ck_assert_int_eq(item->_type.refCount, 1);
ck_assert_str_eq(item->_type.typeID, kEPUB3TocItemTypeID);
- EPUB3ManifestItemRef manifestItem = EPUB3ManifestItemCreate();
const char * href = "a/path/to/something";
- manifestItem->href = strdup(href);
- EPUB3TocItemSetManifestItem(item, manifestItem);
const char * myTitle = "My Title";
item->title = strdup(myTitle);
+ item->href = strdup(href);
char * path = EPUB3TocItemCopyPath(item);
ck_assert_str_eq(path, href);
@@ -169,11 +167,6 @@ START_TEST(test_epub3_toc)
char * title = EPUB3TocItemCopyTitle(item);
ck_assert_str_eq(title, myTitle);
free(title);
-
- ck_assert_int_eq(manifestItem->_type.refCount, 1);
- EPUB3TocItemRelease(item);
- ck_assert_int_eq(manifestItem->_type.refCount, 1);
- EPUB3ManifestItemRelease(manifestItem);
}
END_TEST
@@ -469,14 +469,8 @@ START_TEST(test_epub3_parse_ncx_from_medallion)
TEST_DATA_FILE_SIZE_SANITY_CHECK(path, 6709);
EPUB3Ref blankEPUB = EPUB3Create();
- EPUB3MetadataRef blankMetadata = EPUB3MetadataCreate();
- EPUB3SetMetadata(blankEPUB, blankMetadata);
-
- EPUB3ManifestRef blankManifest = EPUB3ManifestCreate();
- EPUB3SetManifest(blankEPUB, blankManifest);
-
- EPUB3SpineRef blankSpine = EPUB3SpineCreate();
- EPUB3SetSpine(blankEPUB, blankSpine);
+ EPUB3TocRef toc = EPUB3TocCreate();
+ blankEPUB->toc = toc;
struct stat st;
stat(path, &st);
@@ -487,6 +481,13 @@ START_TEST(test_epub3_parse_ncx_from_medallion)
fail_if(ferror(fp) != 0, "Problem reading test data file %s: %s", path, strerror(ferror(fp)));
fail_unless(bytesRead == bufferSize, "Only read %d bytes of the %d byte test data file.", bytesRead, bufferSize);
+
+ EPUB3Error error = EPUB3ParseNCXFromData(blankEPUB, newBuf, (int32_t)bytesRead);
+ fail_unless(error == kEPUB3Success);
+
+ ck_assert_int_eq(toc->rootItemCount, 35);
+
+ EPUB3Release(blankEPUB);
}
END_TEST

0 comments on commit 7094707

Please sign in to comment.