Skip to content

Commit

Permalink
Merge ab1c62b into e13c495
Browse files Browse the repository at this point in the history
  • Loading branch information
unixnut committed Apr 19, 2024
2 parents e13c495 + ab1c62b commit 640f45c
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/Font.php
Expand Up @@ -191,7 +191,7 @@ public function loadTranslateTable(): array
// Support for multiple bfchar sections
if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';

preg_match_all($regexp, $section, $matches);

Expand Down
10 changes: 10 additions & 0 deletions src/Smalot/PdfParser/Page.php
Expand Up @@ -54,6 +54,16 @@ class Page extends PDFObject
*/
protected $dataTm;

/**
* @param array<\Smalot\PdfParser\Font> $fonts
*/
public function setFonts($fonts)
{
if (empty($this->fonts)) {
$this->fonts = $fonts;
}
}

/**
* @return Font[]
*/
Expand Down
55 changes: 55 additions & 0 deletions src/Smalot/PdfParser/Pages.php
Expand Up @@ -39,6 +39,11 @@
*/
class Pages extends PDFObject
{
/**
* @var array<\Smalot\PdfParser\Font>|null
*/
protected $fonts;

/**
* @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them,
*
Expand All @@ -57,17 +62,67 @@ public function getPages(bool $deep = false): array
return $kidsElement->getContent();
}

// Prepare to apply the Pages' object's fonts to each page
if (false === \is_array($this->fonts)) {
$this->setupFonts();
}
$fontsAvailable = 0 < \count($this->fonts);

$kids = $kidsElement->getContent();
$pages = [];

foreach ($kids as $kid) {
if ($kid instanceof self) {
$pages = array_merge($pages, $kid->getPages(true));
} elseif ($kid instanceof Page) {
if ($fontsAvailable) {
$kid->setFonts($this->fonts);
}
$pages[] = $kid;
}
}

return $pages;
}

/**
* Gathers information about fonts and collects them in a list.
*
* @return void
*/
protected function setupFonts()
{
$resources = $this->get('Resources');

if (method_exists($resources, 'has') && $resources->has('Font')) {
// no fonts available, therefore stop here
if ($resources->get('Font') instanceof Element\ElementMissing) {
return;
}

if ($resources->get('Font') instanceof Header) {
$fonts = $resources->get('Font')->getElements();
} else {
$fonts = $resources->get('Font')->getHeader()->getElements();
}

$table = [];

foreach ($fonts as $id => $font) {
if ($font instanceof Font) {
$table[$id] = $font;

// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $font;
}
}
}

$this->fonts = $table;
} else {
$this->fonts = [];
}
}
}
125 changes: 125 additions & 0 deletions tests/PHPUnit/Integration/PagesTest.php
@@ -0,0 +1,125 @@
<?php

/**
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
*
* @date 2024-04-19
*
* @license LGPLv3
*
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/

namespace PHPUnitTests\Integration;

use PHPUnitTests\TestCase;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Font;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\Pages;

/**
* @internal only for test purposes
*/
class PagesDummy extends Pages
{
/**
* @param array<\Smalot\PdfParser\Font> $fonts
*
* @return void
*/
public function setFonts($fonts)
{
$this->fonts = $fonts;
}
}

class PagesTest extends TestCase
{
/**
* If fonts are not stored in Page instances but in the Pages instance.
*
* @see https://github.com/smalot/pdfparser/pull/698
*/
public function testPullRequest698NoFontsSet(): void
{
$document = $this->createMock(Document::class);

// create a Page mock and tell PHPUnit that its setFonts has to be called once
// otherwise an error is raised
$page1 = $this->createMock(Page::class);
$page1->expects($this->once())->method('setFonts');

// setup header
$header = new Header([
'Kids' => new ElementArray([
$page1,
]),
], $document);

$font1 = $this->createMock(Font::class);

$pages = new PagesDummy($document, $header);
$pages->setFonts([$font1]);

// we expect setFonts is called on $page1
$pages->getPages(true);
}

/**
* Dont override fonts list in Page, if available.
*
* @see https://github.com/smalot/pdfparser/pull/698
*/
public function testPullRequest698DontOverride(): void
{
$document = $this->createMock(Document::class);

// create a Page mock and tell PHPUnit that its setFonts has to be called once
// otherwise an error is raised
$font2 = new Font($document);
$page1 = new Page($document);
$page1->setFonts([$font2]);

// setup header
$header = new Header([
'Kids' => new ElementArray([
$page1,
]),
], $document);

$font1 = $this->createMock(Font::class);

$pages = new PagesDummy($document, $header);
$pages->setFonts([$font1]);

$pages->getPages(true);

// note: $font1 and $font2 are intenionally not both of the same type.
// one is a mock and the other one a real instance of Font.
// this way we can simply check the return value of getFonts here.
// if both were one of the other, we had to use a different assertation approach.
$this->assertEquals([$font2], $page1->getFonts());
}
}

0 comments on commit 640f45c

Please sign in to comment.