Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for more than 65535 different attribute values #124

Merged
merged 1 commit into from Mar 10, 2018
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+42 −41
Diff settings

Always

Just for now

Allow for more than 65535 different attribute values

Fix TOC and links with books with many footnotes, links and backlinks.
Attributes values (just like element names and attribute names) are strings
stored in a table, and later referenced only by an integer value that
points to this table entry.
These integer values were stored in a lUInt16 slot, limiting their
number to 0-65535. When going above this limit, it started again
at 0, overriding previous references, which messed attribute string
retrieval. So, id= and href= were wrong, impacting TOC entries and links.
We now store them in a lUint32 slot, allowing for a lot more different
attribute values.
  • Loading branch information...
poire-z committed Mar 10, 2018
commit ffa007dd80c7abd2706d515d5774097c95b14deb
Copy path View file
@@ -62,7 +62,7 @@

#define LXML_NS_NONE 0 ///< no namespace specified
#define LXML_NS_ANY 0xFFFF ///< any namespace can be specified
#define LXML_ATTR_VALUE_NONE 0xFFFF ///< attribute not found
#define LXML_ATTR_VALUE_NONE 0xFFFFFFFF ///< attribute not found

#define DOC_STRING_HASH_SIZE 256
#define RESERVED_DOC_SPACE 4096
@@ -959,21 +959,21 @@ class lxmlDocBase : public tinyNodeCollection
lUInt16 getAttrNameIndex( const lChar8 * name );

/// helper: returns attribute value
inline const lString16 & getAttrValue( lUInt16 index ) const
inline const lString16 & getAttrValue( lUInt32 index ) const
{
return _attrValueTable[index];
}

/// helper: returns attribute value index
inline lUInt16 getAttrValueIndex( const lChar16 * value )
inline lUInt32 getAttrValueIndex( const lChar16 * value )
{
return (lUInt16)_attrValueTable.add( value );
return (lUInt32)_attrValueTable.add( value );
}

/// helper: returns attribute value index, 0xffff if not found
inline lUInt16 findAttrValueIndex( const lChar16 * value )
/// helper: returns attribute value index, 0xffffffff if not found
inline lUInt32 findAttrValueIndex( const lChar16 * value )
{
return (lUInt16)_attrValueTable.find( value );
return (lUInt32)_attrValueTable.find( value );
}

/// Get element name by id
@@ -1048,18 +1048,18 @@ class lxmlDocBase : public tinyNodeCollection
}
#endif

void onAttributeSet( lUInt16 attrId, lUInt16 valueId, ldomNode * node );
void onAttributeSet( lUInt16 attrId, lUInt32 valueId, ldomNode * node );

/// get element by id attribute value code
inline ldomNode * getNodeById( lUInt16 attrValueId )
inline ldomNode * getNodeById( lUInt32 attrValueId )
{
return getTinyNode( _idNodeMap.get( attrValueId ) );
}

/// get element by id attribute value
inline ldomNode * getElementById( const lChar16 * id )
{
lUInt16 attrValueId = getAttrValueIndex( id );
lUInt32 attrValueId = getAttrValueIndex( id );
ldomNode * node = getNodeById( attrValueId );
return node;
}
@@ -1114,7 +1114,7 @@ class lxmlDocBase : public tinyNodeCollection
lUInt16 _nextUnknownAttrId; // Next Id for unknown attribute
lUInt16 _nextUnknownNsId; // Next Id for unknown namespace
lString16HashedCollection _attrValueTable;
LVHashTable<lUInt16,lInt32> _idNodeMap; // id to data index map
LVHashTable<lUInt32,lInt32> _idNodeMap; // id to data index map
LVHashTable<lString16,LVImageSourceRef> _urlImageMap; // url to image source map
lUInt16 _idAttrId; // Id for "id" attribute name
lUInt16 _nameAttrId; // Id for "name" attribute name
@@ -1139,12 +1139,12 @@ struct lxmlAttribute
//
lUInt16 nsid;
lUInt16 id;
lUInt16 index;
lUInt32 index;
inline bool compare( lUInt16 nsId, lUInt16 attrId )
{
return (nsId == nsid || nsId == LXML_NS_ANY) && (id == attrId);
}
inline void setData( lUInt16 nsId, lUInt16 attrId, lUInt16 valueIndex )
inline void setData( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
{
nsid = nsId;
id = attrId;
Copy path View file
@@ -2811,7 +2811,7 @@ bool LVDocView::goLink(lString16 link, bool savePos) {
return false; // only internal links supported (started with #)
}
link = link.substr(1, link.length() - 1);
lUInt16 id = m_doc->getAttrValueIndex(link.c_str());
lUInt32 id = m_doc->getAttrValueIndex(link.c_str());
ldomNode * dest = m_doc->getNodeById(id);
if (!dest)
return false;
Copy path View file
@@ -13,7 +13,7 @@

/// change in case of incompatible changes in swap/cache file format to avoid using incompatible swap file
// increment to force complete reload/reparsing of old file
#define CACHE_FILE_FORMAT_VERSION "3.05.05k"
#define CACHE_FILE_FORMAT_VERSION "3.05.06k"
/// increment following value to force re-formatting of old book after load
#define FORMATTING_VERSION_ID 0x0003

@@ -1432,12 +1432,12 @@ struct ElementDataStorageItem : public DataStorageItemHeader {
lInt32 childCount;
lInt32 children[1];
lUInt16 * attrs() { return (lUInt16 *)(children + childCount); }
lxmlAttribute * attr( int index ) { return (lxmlAttribute *)&(((lUInt16 *)(children + childCount))[index*3]); }
lUInt16 getAttrValueId( lUInt16 ns, lUInt16 id )
lxmlAttribute * attr( int index ) { return (lxmlAttribute *)&(((lUInt16 *)(children + childCount))[index*4]); }
lUInt32 getAttrValueId( lUInt16 ns, lUInt16 id )
{
lUInt16 * a = attrs();
for ( int i=0; i<attrCount; i++ ) {
lxmlAttribute * attr = (lxmlAttribute *)(&a[i*3]);
lxmlAttribute * attr = (lxmlAttribute *)(&a[i*4]);
if ( !attr->compare( ns, id ) )
continue;
return attr->index;
@@ -1448,7 +1448,7 @@ struct ElementDataStorageItem : public DataStorageItemHeader {
{
lUInt16 * a = attrs();
for ( int i=0; i<attrCount; i++ ) {
lxmlAttribute * attr = (lxmlAttribute *)(&a[i*3]);
lxmlAttribute * attr = (lxmlAttribute *)(&a[i*4]);
if ( attr->compare( ns, id ) )
return attr;
}
@@ -2485,7 +2485,7 @@ int ldomTextStorageChunk::addText( lUInt32 dataIndex, lUInt32 parentIndex, const
/// adds new element item to buffer, returns offset inside chunk of stored data
int ldomTextStorageChunk::addElem(lUInt32 dataIndex, lUInt32 parentIndex, int childCount, int attrCount)
{
int itemsize = (sizeof(ElementDataStorageItem) + attrCount*sizeof(lUInt16)*3 + childCount*sizeof(lUInt32) - sizeof(lUInt32) + 15) & 0xFFFFFFF0;
int itemsize = (sizeof(ElementDataStorageItem) + attrCount*(sizeof(lUInt16)*2 + sizeof(lUInt32)) + childCount*sizeof(lUInt32) - sizeof(lUInt32) + 15) & 0xFFFFFFF0;
if ( !_buf ) {
// create new buffer, if necessary
_bufsize = _manager->_chunkSize > itemsize ? _manager->_chunkSize : itemsize;
@@ -2751,7 +2751,7 @@ class ldomAttributeCollection
{
return _len;
}
lUInt16 get( lUInt16 nsId, lUInt16 attrId ) const
lUInt32 get( lUInt16 nsId, lUInt16 attrId ) const
{
for (lUInt16 i=0; i<_len; i++)
{
@@ -2760,7 +2760,7 @@ class ldomAttributeCollection
}
return LXML_ATTR_VALUE_NONE;
}
void set( lUInt16 nsId, lUInt16 attrId, lUInt16 valueIndex )
void set( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
{
// find existing
for (lUInt16 i=0; i<_len; i++)
@@ -2779,7 +2779,7 @@ class ldomAttributeCollection
}
_list[ _len++ ].setData(nsId, attrId, valueIndex);
}
void add( lUInt16 nsId, lUInt16 attrId, lUInt16 valueIndex )
void add( lUInt16 nsId, lUInt16 attrId, lUInt32 valueIndex )
{
// find existing
if (_len>=_size)
@@ -2863,7 +2863,7 @@ lxmlDocBase::~lxmlDocBase()
{
}

void lxmlDocBase::onAttributeSet( lUInt16 attrId, lUInt16 valueId, ldomNode * node )
void lxmlDocBase::onAttributeSet( lUInt16 attrId, lUInt32 valueId, ldomNode * node )
{
if ( _idAttrId==0 )
_idAttrId = _attrNameTable.idByName("id");
@@ -3526,7 +3526,7 @@ static const char * ns_id_map_magic = "NMSP";
static const char * node_by_id_map_magic = "NIDM";

typedef struct {
lUInt16 key;
lUInt32 key;
lUInt32 value;
} id_node_map_item;

@@ -3563,8 +3563,8 @@ void lxmlDocBase::serializeMaps( SerialBuf & buf )
buf.putMagic( node_by_id_map_magic );
lUInt32 cnt = 0;
{
LVHashTable<lUInt16,lInt32>::iterator ii = _idNodeMap.forwardIterator();
for ( LVHashTable<lUInt16,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
LVHashTable<lUInt32,lInt32>::iterator ii = _idNodeMap.forwardIterator();
for ( LVHashTable<lUInt32,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
cnt++;
}
}
@@ -3577,9 +3577,9 @@ void lxmlDocBase::serializeMaps( SerialBuf & buf )
// sort items before serializing!
id_node_map_item * array = new id_node_map_item[cnt];
int i = 0;
LVHashTable<lUInt16,lInt32>::iterator ii = _idNodeMap.forwardIterator();
for ( LVHashTable<lUInt16,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
array[i].key = (lUInt16)p->key;
LVHashTable<lUInt32,lInt32>::iterator ii = _idNodeMap.forwardIterator();
for ( LVHashTable<lUInt32,lInt32>::pair * p = ii.next(); p!=NULL; p = ii.next() ) {
array[i].key = (lUInt32)p->key;
array[i].value = (lUInt32)p->value;
i++;
}
@@ -3645,7 +3645,7 @@ bool lxmlDocBase::deserializeMaps( SerialBuf & buf )
if ( idmsize < 20000 )
_idNodeMap.resize( idmsize*2 );
for ( unsigned i=0; i<idmsize; i++ ) {
lUInt16 key;
lUInt32 key;
lUInt32 value;
buf >> key;
buf >> value;
@@ -4897,7 +4897,7 @@ ldomXPointer ldomDocument::createXPointer( const lString16 & xPointerStr )
{
if ( xPointerStr[0]=='#' ) {
lString16 id = xPointerStr.substr(1);
lUInt16 idid = getAttrValueIndex(id.c_str());
lUInt32 idid = getAttrValueIndex(id.c_str());
lInt32 nodeIndex;
if ( _idNodeMap.get(idid, nodeIndex) ) {
ldomNode * node = getTinyNode(nodeIndex);
@@ -10082,15 +10082,15 @@ const lString16 & ldomNode::getAttributeValue( lUInt16 nsid, lUInt16 id ) const
#endif
// element
tinyElement * me = NPELEM;
lUInt16 valueId = me->_attrs.get( nsid, id );
lUInt32 valueId = me->_attrs.get( nsid, id );
if ( valueId==LXML_ATTR_VALUE_NONE )
return lString16::empty_str;
return getDocument()->getAttrValue(valueId);
#if BUILD_LITE!=1
} else {
// persistent element
ElementDataStorageItem * me = getDocument()->_elemStorage.getElem( _data._pelem_addr );
lUInt16 valueId = me->getAttrValueId( nsid, id );
lUInt32 valueId = me->getAttrValueId( nsid, id );
if ( valueId==LXML_ATTR_VALUE_NONE )
return lString16::empty_str;
return getDocument()->getAttrValue(valueId);
@@ -10148,7 +10148,7 @@ bool ldomNode::hasAttribute( lUInt16 nsid, lUInt16 id ) const
#endif
// element
tinyElement * me = NPELEM;
lUInt16 valueId = me->_attrs.get( nsid, id );
lUInt32 valueId = me->_attrs.get( nsid, id );
return ( valueId!=LXML_ATTR_VALUE_NONE );
#if BUILD_LITE!=1
} else {
@@ -10175,7 +10175,7 @@ void ldomNode::setAttributeValue( lUInt16 nsid, lUInt16 id, const lChar16 * valu
ASSERT_NODE_NOT_NULL;
if ( !isElement() )
return;
lUInt16 valueIndex = getDocument()->getAttrValueIndex(value);
lUInt32 valueIndex = getDocument()->getAttrValueIndex(value);
#if BUILD_LITE!=1
if ( isPersistent() ) {
// persistent element
@@ -11538,8 +11538,8 @@ LVStreamRef ldomDocument::getObjectImageStream( lString16 refName )
}
return ref;
}
lUInt16 refValueId = findAttrValueIndex( refName.c_str() + 1 );
if ( refValueId == (lUInt16)-1 ) {
lUInt32 refValueId = findAttrValueIndex( refName.c_str() + 1 );
if ( refValueId == (lUInt32)-1 ) {
return ref;
}
ldomNode * objnode = getNodeById( refValueId );
@@ -11660,9 +11660,10 @@ ldomNode * ldomNode::persist()
int i;
for ( i=0; i<attrCount; i++ ) {
const lxmlAttribute * attr = elem->_attrs[i];
attrs[i * 3] = attr->nsid; // namespace
attrs[i * 3 + 1] = attr->id; // id
attrs[i * 3 + 2] = attr->index;// value
attrs[i * 4] = attr->nsid; // namespace
attrs[i * 4 + 1] = attr->id; // id
attrs[i * 4 + 2] = (lUInt16)(attr->index & 0xFFFF);// value lower 2-bytes
attrs[i * 4 + 3] = (lUInt16)(attr->index >> 16);// value higher 2-bytes
}
for ( i=0; i<childCount; i++ ) {
data->children[i] = elem->_children[i];
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.