Skip to content

Commit

Permalink
NCX parsing improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
neror committed Nov 9, 2012
1 parent 06b4d74 commit 7094707
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 79 deletions.
126 changes: 66 additions & 60 deletions EPUB3.c
Expand Up @@ -131,10 +131,9 @@ EXPORT char * EPUB3TocItemCopyPath(EPUB3TocItemRef tocItem)
{ {
assert(tocItem != NULL); assert(tocItem != NULL);


if(tocItem->manifestItem == NULL || tocItem->manifestItem->href == NULL) return NULL; if(tocItem->href == NULL) return NULL;


char * path = strdup(tocItem->manifestItem->href); return strdup(tocItem->href);
return path;
} }


#pragma mark - Base Object #pragma mark - Base Object
Expand Down Expand Up @@ -176,6 +175,7 @@ EPUB3Ref EPUB3Create()
memory->metadata = NULL; memory->metadata = NULL;
memory->manifest = NULL; memory->manifest = NULL;
memory->spine = NULL; memory->spine = NULL;
memory->toc = NULL;
memory->archive = NULL; memory->archive = NULL;
memory->archivePath = NULL; memory->archivePath = NULL;
memory->archiveFileCount = 0; memory->archiveFileCount = 0;
Expand Down Expand Up @@ -435,8 +435,8 @@ EPUB3TocItemRef EPUB3TocItemCreate()
{ {
EPUB3TocItemRef memory = malloc(sizeof(struct EPUB3TocItem)); EPUB3TocItemRef memory = malloc(sizeof(struct EPUB3TocItem));
memory = EPUB3ObjectInitWithTypeID(memory, kEPUB3TocItemTypeID); memory = EPUB3ObjectInitWithTypeID(memory, kEPUB3TocItemTypeID);
memory->manifestItem = NULL;
memory->title = NULL; memory->title = NULL;
memory->href = NULL;
memory->parent = NULL; memory->parent = NULL;
memory->childCount = 0; memory->childCount = 0;
memory->childrenHead = NULL; memory->childrenHead = NULL;
Expand All @@ -455,9 +455,9 @@ void EPUB3TocItemRelease(EPUB3TocItemRef item)
if(item == NULL) return; if(item == NULL) return;


if(item->_type.refCount == 1) { if(item->_type.refCount == 1) {
item->manifestItem = NULL; // zero weak ref
item->parent = NULL; // zero weak ref item->parent = NULL; // zero weak ref
EPUB3_FREE_AND_NULL(item->title); EPUB3_FREE_AND_NULL(item->title);
EPUB3_FREE_AND_NULL(item->href);
int totalChildrenToFree = item->childCount; int totalChildrenToFree = item->childCount;
EPUB3TocItemChildListItemPtr itemPtr = item->childrenHead; EPUB3TocItemChildListItemPtr itemPtr = item->childrenHead;
while(itemPtr != NULL) { while(itemPtr != NULL) {
Expand All @@ -474,12 +474,6 @@ void EPUB3TocItemRelease(EPUB3TocItemRef item)
EPUB3ObjectRelease(item); EPUB3ObjectRelease(item);
} }


void EPUB3TocItemSetManifestItem(EPUB3TocItemRef tocItem, EPUB3ManifestItemRef manifestItem)
{
assert(tocItem != NULL);
tocItem->manifestItem = manifestItem;
}

void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item) void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item)
{ {
assert(toc != NULL); assert(toc != NULL);
Expand Down Expand Up @@ -855,6 +849,10 @@ EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename)
epub->spine = EPUB3SpineCreate(); epub->spine = EPUB3SpineCreate();
} }


if(epub->toc == NULL) {
epub->toc = EPUB3TocCreate();
}

void *buffer = NULL; void *buffer = NULL;
uint32_t bufferSize = 0; uint32_t bufferSize = 0;
uint32_t bytesCopied; uint32_t bytesCopied;
Expand Down Expand Up @@ -888,14 +886,27 @@ EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename)
return error; return error;
} }


void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode) void _EPUB3DumpXMLParseContextStack(EPUB3XMLParseContextPtr *ctxPtr)
{
EPUB3XMLParseContextPtr top = *ctxPtr;
fprintf(stderr, "== Parse Context Stack ==\n");
for(;;) {
fprintf(stderr, "%s\n", (const char *)top->tagName);
if(top->state == kEPUB3NCXStateRoot || top->state == kEPUB3OPFStateRoot) break;
top--;
}
fprintf(stderr, "== END Context Stack ==\n");
}

void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode, void * userInfo)
{ {
(*ctxPtr)++; (*ctxPtr)++;
(*ctxPtr)->state = state; (*ctxPtr)->state = state;
(*ctxPtr)->tagName = tagName; (*ctxPtr)->tagName = tagName;
(*ctxPtr)->attributeCount = attrCount; (*ctxPtr)->attributeCount = attrCount;
(*ctxPtr)->attributes = attrs; (*ctxPtr)->attributes = attrs;
(*ctxPtr)->shouldParseTextNode = shouldParseTextNode; (*ctxPtr)->shouldParseTextNode = shouldParseTextNode;
(*ctxPtr)->userInfo = userInfo;
} }


void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr) void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr)
Expand Down Expand Up @@ -924,7 +935,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReade
case XML_READER_TYPE_ELEMENT: case XML_READER_TYPE_ELEMENT:
{ {
if(!xmlTextReaderIsEmptyElement(reader)) { if(!xmlTextReaderIsEmptyElement(reader)) {
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES, NULL);


// Only parse text node for the identifier marked as unique-identifier in the package tag // Only parse text node for the identifier marked as unique-identifier in the package tag
// see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier // see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier
Expand Down Expand Up @@ -984,7 +995,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReade
case XML_READER_TYPE_ELEMENT: case XML_READER_TYPE_ELEMENT:
{ {
if(!xmlTextReaderIsEmptyElement(reader)) { if(!xmlTextReaderIsEmptyElement(reader)) {
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} else { } else {
if(xmlStrcmp(name, BAD_CAST "item") == 0) { if(xmlStrcmp(name, BAD_CAST "item") == 0) {
EPUB3ManifestItemRef newItem = EPUB3ManifestItemCreate(); EPUB3ManifestItemRef newItem = EPUB3ManifestItemCreate();
Expand Down Expand Up @@ -1043,7 +1054,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForSpineInOPF(EPUB3Ref epub, xmlTextReaderPt
case XML_READER_TYPE_ELEMENT: case XML_READER_TYPE_ELEMENT:
{ {
if(!xmlTextReaderIsEmptyElement(reader)) { if(!xmlTextReaderIsEmptyElement(reader)) {
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} else { } else {
if(xmlStrcmp(name, BAD_CAST "itemref") == 0) { if(xmlStrcmp(name, BAD_CAST "itemref") == 0) {
EPUB3SpineItemRef newItem = EPUB3SpineItemCreate(); EPUB3SpineItemRef newItem = EPUB3SpineItemCreate();
Expand Down Expand Up @@ -1113,13 +1124,13 @@ EPUB3Error EPUB3ParseXMLReaderNodeForOPF(EPUB3Ref epub, xmlTextReaderPtr reader,
} }
} }
else if(xmlStrcmp(name, BAD_CAST "metadata") == 0) { else if(xmlStrcmp(name, BAD_CAST "metadata") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES, NULL);
} }
else if(xmlStrcmp(name, BAD_CAST "manifest") == 0) { else if(xmlStrcmp(name, BAD_CAST "manifest") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} }
else if(xmlStrcmp(name, BAD_CAST "spine") == 0) { else if(xmlStrcmp(name, BAD_CAST "spine") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateSpine, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateSpine, name, 0, NULL, kEPUB3_YES, NULL);
} }
} }
break; break;
Expand Down Expand Up @@ -1211,10 +1222,11 @@ EPUB3Error EPUB3ParseNCXFromData(EPUB3Ref epub, void * buffer, uint32_t bufferSi
EPUB3XMLParseContextPtr currentContext = &contextStack[0]; EPUB3XMLParseContextPtr currentContext = &contextStack[0];


int retVal = xmlTextReaderRead(reader); int retVal = xmlTextReaderRead(reader);
currentContext->state = kEPUB3OPFStateRoot; currentContext->state = kEPUB3NCXStateRoot;
currentContext->tagName = xmlTextReaderConstName(reader); currentContext->tagName = xmlTextReaderConstName(reader);
while(retVal == 1) while(retVal == 1)
{ {
// _EPUB3DumpXMLParseContextStack(&currentContext);
error = EPUB3ParseXMLReaderNodeForNCX(epub, reader, &currentContext); error = EPUB3ParseXMLReaderNodeForNCX(epub, reader, &currentContext);
retVal = xmlTextReaderRead(reader); retVal = xmlTextReaderRead(reader);
} }
Expand Down Expand Up @@ -1242,12 +1254,12 @@ EPUB3Error EPUB3ParseXMLReaderNodeForNCX(EPUB3Ref epub, xmlTextReaderPtr reader,
if(name != NULL && currentNodeType != XML_READER_TYPE_COMMENT) { if(name != NULL && currentNodeType != XML_READER_TYPE_COMMENT) {
switch((*currentContext)->state) switch((*currentContext)->state)
{ {
case kEPUB3OPFStateRoot: case kEPUB3NCXStateRoot:
{ {
// fprintf(stdout, "NCX ROOT: %s\n", name); // fprintf(stdout, "NCX ROOT: %s\n", name);
if(currentNodeType == XML_READER_TYPE_ELEMENT) { if(currentNodeType == XML_READER_TYPE_ELEMENT) {
if(xmlStrcmp(name, BAD_CAST "navMap") == 0) { if(xmlStrcmp(name, BAD_CAST "navMap") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_YES); (void)EPUB3SaveParseContext(currentContext, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_YES, NULL);
} }
} }
break; break;
Expand All @@ -1274,36 +1286,23 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForNavMapInNCX(EPUB3Ref epub, xmlTextReaderP
assert(reader != NULL); assert(reader != NULL);


EPUB3Error error = kEPUB3Success; EPUB3Error error = kEPUB3Success;
// const xmlChar *name = xmlTextReaderConstLocalName(reader); const xmlChar *name = xmlTextReaderConstLocalName(reader);
// xmlReaderTypes nodeType = xmlTextReaderNodeType(reader); xmlReaderTypes nodeType = xmlTextReaderNodeType(reader);
//
// switch(nodeType) switch(nodeType)
// { {
// case XML_READER_TYPE_ELEMENT: case XML_READER_TYPE_ELEMENT:
// { {
// if(!xmlTextReaderIsEmptyElement(reader)) { if(!xmlTextReaderIsEmptyElement(reader)) {
// (void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES); if(xmlStrcmp(name, BAD_CAST "navPoint") == 0) {
// EPUB3TocItemRef newTocItem = EPUB3TocItemCreate();
// // Only parse text node for the identifier marked as unique-identifier in the package tag (void)EPUB3SaveParseContext(context, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_NO, newTocItem);
// // see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier }
// if(xmlStrcmp(name, BAD_CAST "identifier") == 0) { }
// if(xmlTextReaderHasAttributes(reader)) { break;
// xmlChar * itemId = xmlTextReaderGetAttribute(reader, BAD_CAST "id"); }
// if(itemId == NULL) { case XML_READER_TYPE_TEXT:
// (*context)->shouldParseTextNode = kEPUB3_NO; {
// }
// else if(itemId != NULL && xmlStrcmp(itemId, BAD_CAST epub->metadata->_uniqueIdentifierID) != 0) {
// (*context)->shouldParseTextNode = kEPUB3_NO;
// EPUB3_FREE_AND_NULL(itemId);
// }
// }
// }
//
// }
// break;
// }
// case XML_READER_TYPE_TEXT:
// {
// const xmlChar *value = xmlTextReaderValue(reader); // const xmlChar *value = xmlTextReaderValue(reader);
// if(value != NULL && (*context)->shouldParseTextNode) { // if(value != NULL && (*context)->shouldParseTextNode) {
// if(xmlStrcmp((*context)->tagName, BAD_CAST "title") == 0) { // if(xmlStrcmp((*context)->tagName, BAD_CAST "title") == 0) {
Expand All @@ -1316,15 +1315,22 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForNavMapInNCX(EPUB3Ref epub, xmlTextReaderP
// (void)EPUB3MetadataSetLanguage(epub->metadata, (const char *)value); // (void)EPUB3MetadataSetLanguage(epub->metadata, (const char *)value);
// } // }
// } // }
// break; break;
// } }
// case XML_READER_TYPE_END_ELEMENT: case XML_READER_TYPE_END_ELEMENT:
// { {
// (void)EPUB3PopAndFreeParseContext(context); if(xmlStrcmp(name, BAD_CAST "navPoint") == 0) {
// break; if((*context)->userInfo != NULL) {
// } EPUB3TocItemRef newTocItem = (*context)->userInfo;
// default: break; EPUB3TocAddRootItem(epub->toc, newTocItem);
// } EPUB3TocItemRelease(newTocItem);
}
(void)EPUB3PopAndFreeParseContext(context);
}
break;
}
default: break;
}
return error; return error;
} }


Expand Down
7 changes: 4 additions & 3 deletions EPUB3_private.h
Expand Up @@ -56,6 +56,7 @@ typedef struct _EPUB3OPFParseContext {
int32_t attributeCount; int32_t attributeCount;
char ** attributes; char ** attributes;
EPUB3Bool shouldParseTextNode; EPUB3Bool shouldParseTextNode;
void * userInfo;
} EPUB3XMLParseContext; } EPUB3XMLParseContext;


typedef EPUB3XMLParseContext * EPUB3XMLParseContextPtr; typedef EPUB3XMLParseContext * EPUB3XMLParseContextPtr;
Expand Down Expand Up @@ -157,11 +158,12 @@ struct EPUB3Toc {
struct EPUB3TocItem { struct EPUB3TocItem {
EPUB3Type _type; EPUB3Type _type;
char * title; char * title;
char * href;
EPUB3TocItemRef parent; //weak ref EPUB3TocItemRef parent; //weak ref
int32_t childCount; int32_t childCount;
EPUB3TocItemChildListItemPtr childrenHead; EPUB3TocItemChildListItemPtr childrenHead;
EPUB3TocItemChildListItemPtr childrenTail; EPUB3TocItemChildListItemPtr childrenTail;
EPUB3ManifestItemRef manifestItem; //weak ref // EPUB3ManifestItemRef manifestItem; //weak ref
}; };


#pragma mark - Base Object #pragma mark - Base Object
Expand Down Expand Up @@ -226,12 +228,11 @@ void EPUB3TocItemRetain(EPUB3TocItemRef item);
void EPUB3TocItemRelease(EPUB3TocItemRef item); void EPUB3TocItemRelease(EPUB3TocItemRef item);
void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item); void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item);
void EPUB3TocItemAppendChild(EPUB3TocItemRef parent, EPUB3TocItemRef child); void EPUB3TocItemAppendChild(EPUB3TocItemRef parent, EPUB3TocItemRef child);
void EPUB3TocItemSetManifestItem(EPUB3TocItemRef tocItem, EPUB3ManifestItemRef manifestItem);


#pragma mark - XML Parsing #pragma mark - XML Parsing


EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename); EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename);
void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode); void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode, void * userInfo);
void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr); void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr);
EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context); EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context);
EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context); EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context);
Expand Down
9 changes: 1 addition & 8 deletions TestEPUB3Processor/check_EPUB3.c
Expand Up @@ -155,12 +155,10 @@ START_TEST(test_epub3_toc)
ck_assert_int_eq(item->_type.refCount, 1); ck_assert_int_eq(item->_type.refCount, 1);
ck_assert_str_eq(item->_type.typeID, kEPUB3TocItemTypeID); ck_assert_str_eq(item->_type.typeID, kEPUB3TocItemTypeID);


EPUB3ManifestItemRef manifestItem = EPUB3ManifestItemCreate();
const char * href = "a/path/to/something"; const char * href = "a/path/to/something";
manifestItem->href = strdup(href);
EPUB3TocItemSetManifestItem(item, manifestItem);
const char * myTitle = "My Title"; const char * myTitle = "My Title";
item->title = strdup(myTitle); item->title = strdup(myTitle);
item->href = strdup(href);


char * path = EPUB3TocItemCopyPath(item); char * path = EPUB3TocItemCopyPath(item);
ck_assert_str_eq(path, href); ck_assert_str_eq(path, href);
Expand All @@ -169,11 +167,6 @@ START_TEST(test_epub3_toc)
char * title = EPUB3TocItemCopyTitle(item); char * title = EPUB3TocItemCopyTitle(item);
ck_assert_str_eq(title, myTitle); ck_assert_str_eq(title, myTitle);
free(title); free(title);

ck_assert_int_eq(manifestItem->_type.refCount, 1);
EPUB3TocItemRelease(item);
ck_assert_int_eq(manifestItem->_type.refCount, 1);
EPUB3ManifestItemRelease(manifestItem);
} }
END_TEST END_TEST


Expand Down
17 changes: 9 additions & 8 deletions TestEPUB3Processor/check_EPUB3_parsing.c
Expand Up @@ -469,14 +469,8 @@ START_TEST(test_epub3_parse_ncx_from_medallion)
TEST_DATA_FILE_SIZE_SANITY_CHECK(path, 6709); TEST_DATA_FILE_SIZE_SANITY_CHECK(path, 6709);
EPUB3Ref blankEPUB = EPUB3Create(); EPUB3Ref blankEPUB = EPUB3Create();


EPUB3MetadataRef blankMetadata = EPUB3MetadataCreate(); EPUB3TocRef toc = EPUB3TocCreate();
EPUB3SetMetadata(blankEPUB, blankMetadata); blankEPUB->toc = toc;

EPUB3ManifestRef blankManifest = EPUB3ManifestCreate();
EPUB3SetManifest(blankEPUB, blankManifest);

EPUB3SpineRef blankSpine = EPUB3SpineCreate();
EPUB3SetSpine(blankEPUB, blankSpine);


struct stat st; struct stat st;
stat(path, &st); stat(path, &st);
Expand All @@ -487,6 +481,13 @@ START_TEST(test_epub3_parse_ncx_from_medallion)


fail_if(ferror(fp) != 0, "Problem reading test data file %s: %s", path, strerror(ferror(fp))); fail_if(ferror(fp) != 0, "Problem reading test data file %s: %s", path, strerror(ferror(fp)));
fail_unless(bytesRead == bufferSize, "Only read %d bytes of the %d byte test data file.", bytesRead, bufferSize); fail_unless(bytesRead == bufferSize, "Only read %d bytes of the %d byte test data file.", bytesRead, bufferSize);

EPUB3Error error = EPUB3ParseNCXFromData(blankEPUB, newBuf, (int32_t)bytesRead);
fail_unless(error == kEPUB3Success);

ck_assert_int_eq(toc->rootItemCount, 35);

EPUB3Release(blankEPUB);
} }
END_TEST END_TEST


Expand Down

0 comments on commit 7094707

Please sign in to comment.