Skip to content

Commit

Permalink
add reading from gzipped xml files (xxx.xml.gz)
Browse files Browse the repository at this point in the history
  • Loading branch information
TangoCash authored and vanhofen committed Aug 30, 2018
1 parent 2179bc9 commit 4bcce91
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 28 deletions.
157 changes: 130 additions & 27 deletions lib/xmltree/xmlinterface.cpp
Expand Up @@ -41,10 +41,12 @@
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#else /* USE_LIBXML */
#include "gzstream.h"
#include "xmltok.h"
#endif /* USE_LIBXML */
#include <fcntl.h>
#include <stdio.h>
#include <zlib.h>

unsigned long xmlGetNumericAttribute(const xmlNodePtr node, const char *name, const int base)
{
Expand Down Expand Up @@ -230,27 +232,103 @@ xmlDocPtr parseXml(const char * data,const char* /*encoding*/)
xmlDocPtr parseXmlFile(const char * filename, bool,const char* encoding)
{
pugi::xml_encoding enc = pugi::encoding_auto;
if(encoding==NULL){
std::ifstream in;
in.open(filename);
if (in.is_open()) {
std::string line;
getline(in, line);
for (std::string::iterator it = line.begin(); it != line.end(); ++ it)
*it = toupper(*it);
if (line.find("ISO-8859-1",0)!= std::string::npos){
enc = pugi::encoding_latin1;
std::string fn = filename;
igzstream inz;
std::ifstream in;
bool zipped = (fn.substr(fn.find_last_of(".") + 1) == "gz");

if(encoding==NULL)
{
if (zipped)
{
inz.open(filename);
if (inz.is_open())
{
std::string line;
getline(inz, line);
for (std::string::iterator it = line.begin(); it != line.end(); ++ it)
*it = toupper(*it);
if (line.find("ISO-8859-1",0)!= std::string::npos)
{
enc = pugi::encoding_latin1;
}
inz.close();
}
}
else
{
in.open(filename);
if (in.is_open())
{
std::string line;
getline(in, line);
for (std::string::iterator it = line.begin(); it != line.end(); ++ it)
*it = toupper(*it);
if (line.find("ISO-8859-1",0)!= std::string::npos)
{
enc = pugi::encoding_latin1;
}
in.close();
}
in.close();
}
}

pugi::xml_document* tree_parser = new pugi::xml_document();

if (!tree_parser->load_file(filename, pugi::parse_default, enc))

if (zipped)
{
delete tree_parser;
return NULL;
int fd = open(filename, O_RDONLY);

uint32_t gzsize = 0;
lseek(fd, -4, SEEK_END);
read(fd, &gzsize, 4);
lseek(fd, 0, SEEK_SET);

gzFile xmlgz_file = gzdopen(fd,"rb");

if (xmlgz_file == NULL)
{
delete tree_parser;
return NULL;
}

gzbuffer(xmlgz_file, 64*1024);

void* buffer = pugi::get_memory_allocation_function()(gzsize);

if (!buffer)
{
gzclose(xmlgz_file);
delete tree_parser;
return NULL;
}

size_t read_size = gzread(xmlgz_file,buffer,gzsize);

if (read_size != gzsize)
{
gzclose(xmlgz_file);
delete tree_parser;
return NULL;
}

gzclose(xmlgz_file);

const pugi::xml_parse_result result = tree_parser->load_buffer_inplace_own(buffer,gzsize, pugi::parse_default, enc);
if (result.status != pugi::xml_parse_status::status_ok)
{
printf("Error: Loading %s (%d)\n", filename, result.status);
delete tree_parser;
return NULL;
}
}
else
{
if (!tree_parser->load_file(filename, pugi::parse_default, enc))
{
delete tree_parser;
return NULL;
}
}

if (!tree_parser->root())
Expand Down Expand Up @@ -295,21 +373,39 @@ xmlDocPtr parseXmlFile(const char * filename, bool warning_by_nonexistence /* =
size_t done;
size_t length;
FILE* xml_file;
gzFile xmlgz_file;
std::string fn = filename;
bool zipped = (fn.substr(fn.find_last_of(".") + 1) == "gz");

xml_file = fopen(filename, "r");

if (xml_file == NULL)
if (zipped)
{
if (warning_by_nonexistence)
perror(filename);
return NULL;
xmlgz_file = gzopen(filename,"r");
if (xmlgz_file == NULL)
{
if (warning_by_nonexistence)
perror(filename);
return NULL;
}
gzbuffer(xmlgz_file, 64*1024);
}
else
{
xml_file = fopen(filename, "r");
if (xml_file == NULL)
{
if (warning_by_nonexistence)
perror(filename);
return NULL;
}
}

tree_parser = new XMLTreeParser(encoding);

do
{
length = fread(buffer, 1, sizeof(buffer), xml_file);
if (zipped)
length = gzread(xmlgz_file, buffer, sizeof(buffer));
else
length = fread(buffer, 1, sizeof(buffer), xml_file);
done = length < sizeof(buffer);

if (!tree_parser->Parse(buffer, length, done))
Expand All @@ -320,17 +416,24 @@ xmlDocPtr parseXmlFile(const char * filename, bool warning_by_nonexistence /* =
tree_parser->ErrorString(tree_parser->GetErrorCode()),
tree_parser->GetCurrentLineNumber());

fclose(xml_file);
if (zipped)
gzclose(xmlgz_file);
else
fclose(xml_file);
delete tree_parser;
return NULL;
}
}
while (!done);

if (posix_fadvise(fileno(xml_file), 0, 0, POSIX_FADV_DONTNEED) != 0)
perror("posix_fadvise FAILED!");
if (!zipped)
if (posix_fadvise(fileno(xml_file), 0, 0, POSIX_FADV_DONTNEED) != 0)
perror("posix_fadvise FAILED!");

fclose(xml_file);
if (zipped)
gzclose(xmlgz_file);
else
fclose(xml_file);

if (!tree_parser->RootNode())
{
Expand Down
3 changes: 2 additions & 1 deletion src/Makefile.am
Expand Up @@ -127,7 +127,8 @@ neutrino_LDADD = \
-ljpeg \
-lutil \
-lOpenThreads \
-lrt -lpthread
-lrt -lpthread \
-lz

if ENABLE_GIFLIB
neutrino_LDADD += -lgif
Expand Down

0 comments on commit 4bcce91

Please sign in to comment.