-
-
Notifications
You must be signed in to change notification settings - Fork 309
/
Document.php
124 lines (101 loc) · 3.32 KB
/
Document.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
<?php
declare(strict_types = 1);
namespace Embed;
use DOMDocument;
use DOMNode;
use DOMXPath;
use HtmlParser\Parser;
use Psr\Http\Message\UriInterface;
use RuntimeException;
use Symfony\Component\CssSelector\CssSelectorConverter;
class Document
{
private static CssSelectorConverter $cssConverter;
private Extractor $extractor;
private DOMDocument $document;
private DOMXPath $xpath;
public function __construct(Extractor $extractor)
{
$this->extractor = $extractor;
$html = (string) $extractor->getResponse()->getBody();
$html = str_replace('<br>', "\n<br>", $html);
$html = str_replace('<br ', "\n<br ", $html);
$this->document = !empty($html) ? Parser::parse($html) : new DOMDocument();
$this->initXPath();
}
private function initXPath()
{
$this->xpath = new DOMXPath($this->document);
$this->xpath->registerNamespace('php', 'http://php.net/xpath');
$this->xpath->registerPhpFunctions();
}
public function __clone()
{
$this->document = clone $this->document;
$this->initXPath();
}
public function remove(string $query): void
{
$nodes = iterator_to_array($this->xpath->query($query), false);
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}
}
public function removeCss(string $query): void
{
$this->remove(self::cssToXpath($query));
}
public function getDocument(): DOMDocument
{
return $this->document;
}
/**
* Helper to build xpath queries easily and case insensitive
*/
private static function buildQuery(string $startQuery, array $attributes): string
{
$selector = [$startQuery];
foreach ($attributes as $name => $value) {
$selector[] = sprintf('[php:functionString("strtolower", @%s)="%s"]', $name, mb_strtolower($value));
}
return implode('', $selector);
}
/**
* Select a element in the dom
*/
public function select(string $query, array $attributes = null, DOMNode $context = null): QueryResult
{
if (!empty($attributes)) {
$query = self::buildQuery($query, $attributes);
}
return new QueryResult($this->xpath->query($query, $context), $this->extractor);
}
/**
* Select a element in the dom using a css selector
*/
public function selectCss(string $query, DOMNode $context = null): QueryResult
{
return $this->select(self::cssToXpath($query), null, $context);
}
/**
* Shortcut to select a <link> element and return the href
*/
public function link(string $rel, array $extra = []): ?UriInterface
{
return $this->select('.//link', ['rel' => $rel] + $extra)->url('href');
}
public function __toString(): string
{
return Parser::stringify($this->getDocument());
}
private static function cssToXpath(string $selector): string
{
if (!isset(self::$cssConverter)) {
if (!class_exists(CssSelectorConverter::class)) {
throw new RuntimeException('You need to install "symfony/css-selector" to use css selectors');
}
self::$cssConverter = new CssSelectorConverter();
}
return self::$cssConverter->toXpath($selector);
}
}