This repository has been archived by the owner on Jun 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 89
/
Html.php
96 lines (81 loc) · 2.72 KB
/
Html.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
<?php
namespace Gufy\PdfToHtml;
use DOMDocument;
use DOMNode;
use DOMXPath;
use PHPHtmlParser\Dom;
class Html extends Dom
{
protected $contents, $total_pages, $current_page, $pdf_file, $locked = false;
protected $default_options = [
'singlePage' => true,
'noFrames' => false,
];
public function __construct($pdf_file, $options = [])
{
$options = array_merge($this->default_options, $options);
$this->getContents($pdf_file, $options);
return $this;
}
/**
* @param $pdf_file
* @param array $options
*/
private function getContents($pdf_file, $options)
{
$this->locked = true;
$info = new Pdf($pdf_file);
$pdf = new Base($pdf_file, $options);
$pages = $info->getPages();
$random_dir = uniqid();
$outputDir = Config::get('pdftohtml.output', dirname(__FILE__).'/../output/'.$random_dir);
if (!file_exists($outputDir))
mkdir($outputDir, 0777, true);
$pdf->setOutputDirectory($outputDir);
$pdf->generate();
$fileinfo = pathinfo($pdf_file);
$base_path = $pdf->outputDir.'/'.$fileinfo['filename'];
$contents = [];
for ($i = 1; $i <= $pages; $i++) {
$content = file_get_contents($base_path.'-'.$i.'.html');
$content = str_replace("Â", "", $content);
if ($this->inlineCss()) {
$dom = new DOMDocument();
$dom->loadHTML($content);
$xpath = new DOMXPath($dom);
foreach ($xpath->query('//comment()') as $comment) {
$comment->parentNode->removeChild($comment);
}
$body = $xpath->query('//body')->item(0);
$content = $body instanceof DOMNode ? $dom->saveHTML($body) : 'something failed';
}
file_put_contents($base_path.'-'.$i.'.html', $content);
$contents[ $i ] = file_get_contents($base_path.'-'.$i.'.html');
}
$this->contents = $contents;
$this->goToPage(1);
}
public function goToPage($page = 1)
{
if ($page > count($this->contents))
throw new \Exception("You're asking to go to page {$page} but max page of this document is ".count($this->contents));
$this->current_page = $page;
return $this->load($this->contents[ $page ]);
}
public function raw($page = 1)
{
return $this->contents[ $page ];
}
public function getTotalPages()
{
return count($this->contents);
}
public function getCurrentPage()
{
return $this->current_page;
}
public function inlineCss()
{
return Config::get('pdftohtml.inlineCss', true);
}
}