Skip to content

Commit

Permalink
update patch a387ad8..c215c42 for poppler-0.63.0
Browse files Browse the repository at this point in the history
  • Loading branch information
mpsuzuki committed Mar 19, 2018
1 parent 2ab5707 commit 8ce2556
Show file tree
Hide file tree
Showing 12 changed files with 208 additions and 59 deletions.
83 changes: 83 additions & 0 deletions cpp/poppler-font-private.h
@@ -0,0 +1,83 @@
/*
* Copyright (C) 2009, Pino Toscano <pino@kde.org>
* Copyright (C) 2015, Tamas Szekeres <szekerest@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
*/

#include "poppler-font.h"

#include "poppler-document-private.h"

#include "FontInfo.h"

#include <algorithm>

using namespace poppler;

class poppler::font_info_private
{
public:
font_info_private()
: type(font_info::unknown)
, is_embedded(false)
, is_subset(false)
{
}
font_info_private(FontInfo *fi)
: type((font_info::type_enum)fi->getType())
, is_embedded(fi->getEmbedded())
, is_subset(fi->getSubset())
{
if (fi->getName()) {
font_name = fi->getName()->getCString();
}
if (fi->getFile()) {
font_file = fi->getFile()->getCString();
}

ref.num = fi->getRef().num;
ref.gen = fi->getRef().gen;
emb_ref.num = fi->getEmbRef().num;
emb_ref.gen = fi->getEmbRef().gen;
}

std::string font_name;
std::string font_file;
font_info::type_enum type : 5;
bool is_embedded : 1;
bool is_subset : 1;
Ref ref;
Ref emb_ref;
};


class poppler::font_iterator_private
{
public:
font_iterator_private(int start_page, document_private *dd)
: font_info_scanner(dd->doc, start_page)
, total_pages(dd->doc->getNumPages())
, current_page((std::max)(start_page, 0))
{
}
~font_iterator_private()
{
}

FontInfoScanner font_info_scanner;
int total_pages;
int current_page;
};
59 changes: 9 additions & 50 deletions cpp/poppler-font.cpp
Expand Up @@ -21,60 +21,14 @@

#include "poppler-document-private.h"

#include "poppler-font-private.h"

#include "FontInfo.h"

#include <algorithm>

using namespace poppler;

class poppler::font_info_private
{
public:
font_info_private()
: type(font_info::unknown)
, is_embedded(false)
, is_subset(false)
{
}
font_info_private(FontInfo *fi)
: type((font_info::type_enum)fi->getType())
, is_embedded(fi->getEmbedded())
, is_subset(fi->getSubset())
{
if (fi->getName()) {
font_name = fi->getName()->getCString();
}
if (fi->getFile()) {
font_file = fi->getFile()->getCString();
}
}

std::string font_name;
std::string font_file;
font_info::type_enum type : 5;
bool is_embedded : 1;
bool is_subset : 1;
};


class poppler::font_iterator_private
{
public:
font_iterator_private(int start_page, document_private *dd)
: font_info_scanner(dd->doc, start_page)
, total_pages(dd->doc->getNumPages())
, current_page((std::max)(start_page, 0))
{
}
~font_iterator_private()
{
}

FontInfoScanner font_info_scanner;
int total_pages;
int current_page;
};

/**
\class poppler::font_info poppler-font.h "poppler/cpp/poppler-font.h"
Expand Down Expand Up @@ -205,15 +159,15 @@ font_iterator::~font_iterator()
/**
Returns the fonts of the current page and advances to the next one.
*/
std::vector<font_info> font_iterator::next()
std::vector<font_info> font_iterator::next(bool doSubst)
{
if (!has_next()) {
return std::vector<font_info>();
}

++d->current_page;

GooList *items = d->font_info_scanner.scan(1);
GooList *items = d->font_info_scanner.scan(1, doSubst);
if (!items) {
return std::vector<font_info>();
}
Expand All @@ -225,6 +179,11 @@ std::vector<font_info> font_iterator::next()
return fonts;
}

std::vector<font_info> font_iterator::next()
{
return font_iterator::next(true);
}

/**
\returns whether the iterator has more pages to advance to
*/
Expand Down
4 changes: 4 additions & 0 deletions cpp/poppler-font.h
Expand Up @@ -67,6 +67,7 @@ class POPPLER_CPP_EXPORT font_info

font_info_private *d;
friend class font_iterator;
friend class page;
};


Expand All @@ -75,6 +76,7 @@ class POPPLER_CPP_EXPORT font_iterator : public poppler::noncopyable
public:
~font_iterator();

std::vector<font_info> next(bool doSubst);
std::vector<font_info> next();
bool has_next() const;
int current_page() const;
Expand All @@ -84,6 +86,8 @@ class POPPLER_CPP_EXPORT font_iterator : public poppler::noncopyable

font_iterator_private *d;
friend class document;
friend class page;
friend class page_private;
};

}
Expand Down
5 changes: 5 additions & 0 deletions cpp/poppler-page-private.h
Expand Up @@ -29,6 +29,7 @@ namespace poppler

class document_private;
class page_transition;
class font_info;

class page_private
{
Expand All @@ -46,6 +47,10 @@ class page_private

static inline page_private* get(const poppler::page *p)
{ return const_cast<poppler::page *>(p)->d; }

std::vector<font_info> font_info_cache;

size_t fill_font_info_cache();
};

}
Expand Down
49 changes: 49 additions & 0 deletions cpp/poppler-page.cpp
Expand Up @@ -26,6 +26,8 @@
#include "poppler-document-private.h"
#include "poppler-page-private.h"
#include "poppler-private.h"
#include "poppler-font-private.h"
#include "poppler-font.h"

#include "TextOutputDev.h"

Expand All @@ -48,6 +50,23 @@ page_private::~page_private()
delete transition;
}

size_t page_private::fill_font_info_cache()
{
if (font_info_cache.size() > 0)
return font_info_cache.size();

poppler::font_iterator* font_iterator = new poppler::font_iterator(index, doc);

if (font_iterator->has_next()) {
/* do not trigger time-consuming substitution */
font_info_cache = font_iterator->next(false);
}

delete font_iterator;

return font_info_cache.size();
}

/**
\class poppler::page poppler-page.h "poppler/cpp/poppler-page.h"
Expand Down Expand Up @@ -321,8 +340,25 @@ bool text_box::has_space_after() const
return m_data->has_space_after;
}

int text_box::get_wmode(int i) const
{
return m_data->wmodes[i];
}

double text_box::get_font_size() const
{
return m_data->font_size;
}

std::string text_box::get_font_name(int i) const
{
return m_data->font_infos[i]->name();
}

std::vector<text_box> page::text_list() const
{
d->fill_font_info_cache();

std::vector<text_box> output_list;

/* config values are same with Qt5 Page::TextList() */
Expand Down Expand Up @@ -366,13 +402,26 @@ std::vector<text_box> page::text_list() const
{},
word->hasSpaceAfter() == gTrue
}};
tb.m_data->font_size = word->getFontSize();

tb.m_data->char_bboxes.reserve(word->getLength());
for (int j = 0; j < word->getLength(); j ++) {
word->getCharBBox(j, &xMin, &yMin, &xMax, &yMax);
tb.m_data->char_bboxes.push_back({xMin, yMin, xMax-xMin, yMax-yMin});
}

tb.m_data->wmodes.reserve(word->getLength());
tb.m_data->font_infos.reserve(word->getLength());
for (int j = 0; j < word->getLength(); j ++) {
TextFontInfo* text_font_info = word->getFontInfo(j);
tb.m_data->wmodes.push_back(text_font_info->getWMode());
for (size_t k = 0; k < d->font_info_cache.size(); k ++) {
if (text_font_info->matches(d->font_info_cache[k].d->ref)) {
tb.m_data->font_infos.push_back(&(d->font_info_cache[k]));
}
}
}

output_list.push_back(std::move(tb));
}
}
Expand Down
6 changes: 6 additions & 0 deletions cpp/poppler-page.h
Expand Up @@ -57,6 +57,12 @@ class POPPLER_CPP_EXPORT text_box
*/
rectf char_bbox(size_t i) const;
bool has_space_after() const;

/* new functions missing in Qt frontend */
int get_wmode(int i = 0) const;
double get_font_size() const;
std::string get_font_name(int i = 0) const; /* sometimes in legacy encoding */

private:
text_box(text_box_data *data);

Expand Down
9 changes: 9 additions & 0 deletions cpp/poppler-private.h
Expand Up @@ -69,6 +69,7 @@ void delete_all(const Collection &c)
delete_all(c.begin(), c.end());
}

class font_info;
struct text_box_data
{
~text_box_data();
Expand All @@ -77,6 +78,14 @@ struct text_box_data
rectf bbox;
std::vector<rectf> char_bboxes;
bool has_space_after;

/*
* font_info objects are collected in document object,
* no need to duplicate here, just refer it
*/
std::vector<int> wmodes;
double font_size;
std::vector<font_info*> font_infos;
};

}
Expand Down
4 changes: 4 additions & 0 deletions cpp/tests/poppler-dump.cpp
Expand Up @@ -340,8 +340,12 @@ static void print_page_text_list(poppler::page *p)
for (size_t i = 0; i < text_list.size(); i ++) {
poppler::rectf bbox = text_list[i].bbox();
poppler::ustring ustr = text_list[i].text();
std::string font_name = text_list[i].get_font_name(0);
double font_size = text_list[i].get_font_size();
int wmode = text_list[i].get_wmode();
std::cout << "[" << ustr << "] @ ";
std::cout << "( x=" << bbox.x() << " y=" << bbox.y() << " w=" << bbox.width() << " h=" << bbox.height() << " )";
std::cout << "( fontname=" << font_name << " fontsize=" << font_size << " wmode=" << wmode << " )";
std::cout << std::endl;

}
Expand Down

0 comments on commit 8ce2556

Please sign in to comment.