Skip to content

Commit

Permalink
NCX parsing improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
neror committed Nov 9, 2012
1 parent 06b4d74 commit 7094707
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 79 deletions.
126 changes: 66 additions & 60 deletions EPUB3.c
Expand Up @@ -131,10 +131,9 @@ EXPORT char * EPUB3TocItemCopyPath(EPUB3TocItemRef tocItem)
{
assert(tocItem != NULL);

if(tocItem->manifestItem == NULL || tocItem->manifestItem->href == NULL) return NULL;
if(tocItem->href == NULL) return NULL;

char * path = strdup(tocItem->manifestItem->href);
return path;
return strdup(tocItem->href);
}

#pragma mark - Base Object
Expand Down Expand Up @@ -176,6 +175,7 @@ EPUB3Ref EPUB3Create()
memory->metadata = NULL;
memory->manifest = NULL;
memory->spine = NULL;
memory->toc = NULL;
memory->archive = NULL;
memory->archivePath = NULL;
memory->archiveFileCount = 0;
Expand Down Expand Up @@ -435,8 +435,8 @@ EPUB3TocItemRef EPUB3TocItemCreate()
{
EPUB3TocItemRef memory = malloc(sizeof(struct EPUB3TocItem));
memory = EPUB3ObjectInitWithTypeID(memory, kEPUB3TocItemTypeID);
memory->manifestItem = NULL;
memory->title = NULL;
memory->href = NULL;
memory->parent = NULL;
memory->childCount = 0;
memory->childrenHead = NULL;
Expand All @@ -455,9 +455,9 @@ void EPUB3TocItemRelease(EPUB3TocItemRef item)
if(item == NULL) return;

if(item->_type.refCount == 1) {
item->manifestItem = NULL; // zero weak ref
item->parent = NULL; // zero weak ref
EPUB3_FREE_AND_NULL(item->title);
EPUB3_FREE_AND_NULL(item->href);
int totalChildrenToFree = item->childCount;
EPUB3TocItemChildListItemPtr itemPtr = item->childrenHead;
while(itemPtr != NULL) {
Expand All @@ -474,12 +474,6 @@ void EPUB3TocItemRelease(EPUB3TocItemRef item)
EPUB3ObjectRelease(item);
}

void EPUB3TocItemSetManifestItem(EPUB3TocItemRef tocItem, EPUB3ManifestItemRef manifestItem)
{
assert(tocItem != NULL);
tocItem->manifestItem = manifestItem;
}

void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item)
{
assert(toc != NULL);
Expand Down Expand Up @@ -855,6 +849,10 @@ EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename)
epub->spine = EPUB3SpineCreate();
}

if(epub->toc == NULL) {
epub->toc = EPUB3TocCreate();
}

void *buffer = NULL;
uint32_t bufferSize = 0;
uint32_t bytesCopied;
Expand Down Expand Up @@ -888,14 +886,27 @@ EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename)
return error;
}

void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode)
void _EPUB3DumpXMLParseContextStack(EPUB3XMLParseContextPtr *ctxPtr)
{
EPUB3XMLParseContextPtr top = *ctxPtr;
fprintf(stderr, "== Parse Context Stack ==\n");
for(;;) {
fprintf(stderr, "%s\n", (const char *)top->tagName);
if(top->state == kEPUB3NCXStateRoot || top->state == kEPUB3OPFStateRoot) break;
top--;
}
fprintf(stderr, "== END Context Stack ==\n");
}

void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode, void * userInfo)
{
(*ctxPtr)++;
(*ctxPtr)->state = state;
(*ctxPtr)->tagName = tagName;
(*ctxPtr)->attributeCount = attrCount;
(*ctxPtr)->attributes = attrs;
(*ctxPtr)->shouldParseTextNode = shouldParseTextNode;
(*ctxPtr)->userInfo = userInfo;
}

void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr)
Expand Down Expand Up @@ -924,7 +935,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReade
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES, NULL);

// Only parse text node for the identifier marked as unique-identifier in the package tag
// see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier
Expand Down Expand Up @@ -984,7 +995,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReade
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} else {
if(xmlStrcmp(name, BAD_CAST "item") == 0) {
EPUB3ManifestItemRef newItem = EPUB3ManifestItemCreate();
Expand Down Expand Up @@ -1043,7 +1054,7 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForSpineInOPF(EPUB3Ref epub, xmlTextReaderPt
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(context, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
} else {
if(xmlStrcmp(name, BAD_CAST "itemref") == 0) {
EPUB3SpineItemRef newItem = EPUB3SpineItemCreate();
Expand Down Expand Up @@ -1113,13 +1124,13 @@ EPUB3Error EPUB3ParseXMLReaderNodeForOPF(EPUB3Ref epub, xmlTextReaderPtr reader,
}
}
else if(xmlStrcmp(name, BAD_CAST "metadata") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES, NULL);
}
else if(xmlStrcmp(name, BAD_CAST "manifest") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateManifest, name, 0, NULL, kEPUB3_YES, NULL);
}
else if(xmlStrcmp(name, BAD_CAST "spine") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateSpine, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(currentContext, kEPUB3OPFStateSpine, name, 0, NULL, kEPUB3_YES, NULL);
}
}
break;
Expand Down Expand Up @@ -1211,10 +1222,11 @@ EPUB3Error EPUB3ParseNCXFromData(EPUB3Ref epub, void * buffer, uint32_t bufferSi
EPUB3XMLParseContextPtr currentContext = &contextStack[0];

int retVal = xmlTextReaderRead(reader);
currentContext->state = kEPUB3OPFStateRoot;
currentContext->state = kEPUB3NCXStateRoot;
currentContext->tagName = xmlTextReaderConstName(reader);
while(retVal == 1)
{
// _EPUB3DumpXMLParseContextStack(&currentContext);
error = EPUB3ParseXMLReaderNodeForNCX(epub, reader, &currentContext);
retVal = xmlTextReaderRead(reader);
}
Expand Down Expand Up @@ -1242,12 +1254,12 @@ EPUB3Error EPUB3ParseXMLReaderNodeForNCX(EPUB3Ref epub, xmlTextReaderPtr reader,
if(name != NULL && currentNodeType != XML_READER_TYPE_COMMENT) {
switch((*currentContext)->state)
{
case kEPUB3OPFStateRoot:
case kEPUB3NCXStateRoot:
{
// fprintf(stdout, "NCX ROOT: %s\n", name);
if(currentNodeType == XML_READER_TYPE_ELEMENT) {
if(xmlStrcmp(name, BAD_CAST "navMap") == 0) {
(void)EPUB3SaveParseContext(currentContext, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_YES);
(void)EPUB3SaveParseContext(currentContext, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_YES, NULL);
}
}
break;
Expand All @@ -1274,36 +1286,23 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForNavMapInNCX(EPUB3Ref epub, xmlTextReaderP
assert(reader != NULL);

EPUB3Error error = kEPUB3Success;
// const xmlChar *name = xmlTextReaderConstLocalName(reader);
// xmlReaderTypes nodeType = xmlTextReaderNodeType(reader);
//
// switch(nodeType)
// {
// case XML_READER_TYPE_ELEMENT:
// {
// if(!xmlTextReaderIsEmptyElement(reader)) {
// (void)EPUB3SaveParseContext(context, kEPUB3OPFStateMetadata, name, 0, NULL, kEPUB3_YES);
//
// // Only parse text node for the identifier marked as unique-identifier in the package tag
// // see: http://idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcidentifier
// if(xmlStrcmp(name, BAD_CAST "identifier") == 0) {
// if(xmlTextReaderHasAttributes(reader)) {
// xmlChar * itemId = xmlTextReaderGetAttribute(reader, BAD_CAST "id");
// if(itemId == NULL) {
// (*context)->shouldParseTextNode = kEPUB3_NO;
// }
// else if(itemId != NULL && xmlStrcmp(itemId, BAD_CAST epub->metadata->_uniqueIdentifierID) != 0) {
// (*context)->shouldParseTextNode = kEPUB3_NO;
// EPUB3_FREE_AND_NULL(itemId);
// }
// }
// }
//
// }
// break;
// }
// case XML_READER_TYPE_TEXT:
// {
const xmlChar *name = xmlTextReaderConstLocalName(reader);
xmlReaderTypes nodeType = xmlTextReaderNodeType(reader);

switch(nodeType)
{
case XML_READER_TYPE_ELEMENT:
{
if(!xmlTextReaderIsEmptyElement(reader)) {
if(xmlStrcmp(name, BAD_CAST "navPoint") == 0) {
EPUB3TocItemRef newTocItem = EPUB3TocItemCreate();
(void)EPUB3SaveParseContext(context, kEPUB3NCXStateNavMap, name, 0, NULL, kEPUB3_NO, newTocItem);
}
}
break;
}
case XML_READER_TYPE_TEXT:
{
// const xmlChar *value = xmlTextReaderValue(reader);
// if(value != NULL && (*context)->shouldParseTextNode) {
// if(xmlStrcmp((*context)->tagName, BAD_CAST "title") == 0) {
Expand All @@ -1316,15 +1315,22 @@ EPUB3Error EPUB3ProcessXMLReaderNodeForNavMapInNCX(EPUB3Ref epub, xmlTextReaderP
// (void)EPUB3MetadataSetLanguage(epub->metadata, (const char *)value);
// }
// }
// break;
// }
// case XML_READER_TYPE_END_ELEMENT:
// {
// (void)EPUB3PopAndFreeParseContext(context);
// break;
// }
// default: break;
// }
break;
}
case XML_READER_TYPE_END_ELEMENT:
{
if(xmlStrcmp(name, BAD_CAST "navPoint") == 0) {
if((*context)->userInfo != NULL) {
EPUB3TocItemRef newTocItem = (*context)->userInfo;
EPUB3TocAddRootItem(epub->toc, newTocItem);
EPUB3TocItemRelease(newTocItem);
}
(void)EPUB3PopAndFreeParseContext(context);
}
break;
}
default: break;
}
return error;
}

Expand Down
7 changes: 4 additions & 3 deletions EPUB3_private.h
Expand Up @@ -56,6 +56,7 @@ typedef struct _EPUB3OPFParseContext {
int32_t attributeCount;
char ** attributes;
EPUB3Bool shouldParseTextNode;
void * userInfo;
} EPUB3XMLParseContext;

typedef EPUB3XMLParseContext * EPUB3XMLParseContextPtr;
Expand Down Expand Up @@ -157,11 +158,12 @@ struct EPUB3Toc {
struct EPUB3TocItem {
EPUB3Type _type;
char * title;
char * href;
EPUB3TocItemRef parent; //weak ref
int32_t childCount;
EPUB3TocItemChildListItemPtr childrenHead;
EPUB3TocItemChildListItemPtr childrenTail;
EPUB3ManifestItemRef manifestItem; //weak ref
// EPUB3ManifestItemRef manifestItem; //weak ref
};

#pragma mark - Base Object
Expand Down Expand Up @@ -226,12 +228,11 @@ void EPUB3TocItemRetain(EPUB3TocItemRef item);
void EPUB3TocItemRelease(EPUB3TocItemRef item);
void EPUB3TocAddRootItem(EPUB3TocRef toc, EPUB3TocItemRef item);
void EPUB3TocItemAppendChild(EPUB3TocItemRef parent, EPUB3TocItemRef child);
void EPUB3TocItemSetManifestItem(EPUB3TocItemRef tocItem, EPUB3ManifestItemRef manifestItem);

#pragma mark - XML Parsing

EPUB3Error EPUB3InitFromOPF(EPUB3Ref epub, const char * opfFilename);
void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode);
void EPUB3SaveParseContext(EPUB3XMLParseContextPtr *ctxPtr, EPUB3XMLParseState state, const xmlChar * tagName, int32_t attrCount, char ** attrs, EPUB3Bool shouldParseTextNode, void * userInfo);
void EPUB3PopAndFreeParseContext(EPUB3XMLParseContextPtr *contextPtr);
EPUB3Error EPUB3ProcessXMLReaderNodeForMetadataInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context);
EPUB3Error EPUB3ProcessXMLReaderNodeForManifestInOPF(EPUB3Ref epub, xmlTextReaderPtr reader, EPUB3XMLParseContextPtr *context);
Expand Down
9 changes: 1 addition & 8 deletions TestEPUB3Processor/check_EPUB3.c
Expand Up @@ -155,12 +155,10 @@ START_TEST(test_epub3_toc)
ck_assert_int_eq(item->_type.refCount, 1);
ck_assert_str_eq(item->_type.typeID, kEPUB3TocItemTypeID);

EPUB3ManifestItemRef manifestItem = EPUB3ManifestItemCreate();
const char * href = "a/path/to/something";
manifestItem->href = strdup(href);
EPUB3TocItemSetManifestItem(item, manifestItem);
const char * myTitle = "My Title";
item->title = strdup(myTitle);
item->href = strdup(href);

char * path = EPUB3TocItemCopyPath(item);
ck_assert_str_eq(path, href);
Expand All @@ -169,11 +167,6 @@ START_TEST(test_epub3_toc)
char * title = EPUB3TocItemCopyTitle(item);
ck_assert_str_eq(title, myTitle);
free(title);

ck_assert_int_eq(manifestItem->_type.refCount, 1);
EPUB3TocItemRelease(item);
ck_assert_int_eq(manifestItem->_type.refCount, 1);
EPUB3ManifestItemRelease(manifestItem);
}
END_TEST

Expand Down
17 changes: 9 additions & 8 deletions TestEPUB3Processor/check_EPUB3_parsing.c
Expand Up @@ -469,14 +469,8 @@ START_TEST(test_epub3_parse_ncx_from_medallion)
TEST_DATA_FILE_SIZE_SANITY_CHECK(path, 6709);
EPUB3Ref blankEPUB = EPUB3Create();

EPUB3MetadataRef blankMetadata = EPUB3MetadataCreate();
EPUB3SetMetadata(blankEPUB, blankMetadata);

EPUB3ManifestRef blankManifest = EPUB3ManifestCreate();
EPUB3SetManifest(blankEPUB, blankManifest);

EPUB3SpineRef blankSpine = EPUB3SpineCreate();
EPUB3SetSpine(blankEPUB, blankSpine);
EPUB3TocRef toc = EPUB3TocCreate();
blankEPUB->toc = toc;

struct stat st;
stat(path, &st);
Expand All @@ -487,6 +481,13 @@ START_TEST(test_epub3_parse_ncx_from_medallion)

fail_if(ferror(fp) != 0, "Problem reading test data file %s: %s", path, strerror(ferror(fp)));
fail_unless(bytesRead == bufferSize, "Only read %d bytes of the %d byte test data file.", bytesRead, bufferSize);

EPUB3Error error = EPUB3ParseNCXFromData(blankEPUB, newBuf, (int32_t)bytesRead);
fail_unless(error == kEPUB3Success);

ck_assert_int_eq(toc->rootItemCount, 35);

EPUB3Release(blankEPUB);
}
END_TEST

Expand Down

0 comments on commit 7094707

Please sign in to comment.