-
Notifications
You must be signed in to change notification settings - Fork 0
/
classGrabberText.php
81 lines (58 loc) · 1.88 KB
/
classGrabberText.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
<?php
require_once 'simple_html_dom.php';
class GrabberText {
private $urlPattern = array();
private $fillHorizontal = false;
private function horizontal($arr) {
$arrH = array();
foreach ($arr as $cur) {
foreach ($cur as $block) {
if ($arr["h1"][0] != $block)
@$arrH[$arr["h1"][0]][] = $block;
}
}
return $arrH;
}
public function setFilHorizontal($bool) {
$this->fillHorizontal = $bool;
}
public function addUrlPattern($urlPattern) {
$this->urlPattern[] = $urlPattern;
}
public function getUrlPatterns() {
return $this->urlPattern;
}
public function getTexts($url) {
$texts = array();
$html = file_get_html($url);
if (is_object($html)) {
foreach ($this->getUrlPatterns() as $pattern) {
foreach ($html->find($pattern) as $text) {
if(isset($text->src))
$texts[$pattern." src"][] = $text->src;
$text = strip_tags($text);
$text = str_replace("|", "", $text);
$text = str_replace("\n", "", $text);
$text = str_replace("\r", "", $text);
$text = str_replace("'", "", $text);
$text = str_replace("\"", "", $text);
$text = str_replace(" ", "", $text);
if (strlen($text) > 5)
$texts[$pattern][] = $text;
}
}
}
if ($this->fillHorizontal) {
return $this->horizontal($texts);
}
return $texts;
}
}
/*
Use
$gl = new GrabberText();
$gl->addUrlPattern('.authors');
$gl->addUrlPattern('.quote');
print_r($gl->getTexts("https://www.quotetab.com/quotes/by-richard-bach"));
*
*/