-
Notifications
You must be signed in to change notification settings - Fork 25
/
TikaRestClient.php
125 lines (117 loc) · 3.59 KB
/
TikaRestClient.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
<?php
use Guzzle\Http\Client;
use Guzzle\Http\Exception\RequestException;
class TikaRestClient extends Client
{
/**
* Authentication options to be sent to the Tika server
*
* @var array
*/
protected $options = array('username' => null, 'password' => null);
/**
* @var array
*/
protected $mimes = array();
public function __construct($baseUrl = '', $config = null)
{
if (defined('SS_TIKA_USERNAME') && defined('SS_TIKA_PASSWORD')) {
$this->options = array(
'username' => SS_TIKA_USERNAME,
'password' => SS_TIKA_PASSWORD,
);
}
parent::__construct($baseUrl, $config);
}
/**
* Detect if the service is available
*
* @return bool
*/
public function isAvailable()
{
try {
$result = $this->get(null);
$result->setAuth($this->options['username'], $this->options['password']);
$result->send();
if ($result->getResponse()->getStatusCode() == 200) {
return true;
}
} catch (RequestException $ex) {
SS_Log::log(sprintf("Tika unavailable - %s", $ex->getMessage()), SS_Log::ERR);
return false;
}
}
/**
* Get version code
*
* @return float
*/
public function getVersion()
{
$response = $this->get('version');
$response->setAuth($this->options['username'], $this->options['password']);
$response->send();
$version = 0.0;
// Parse output
if ($response->getResponse()->getStatusCode() == 200 &&
preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getResponse()->getBody(), $matches)
) {
$version = (float)$matches['version'];
}
return $version;
}
/**
* Gets supported mime data. May include aliased mime types.
*
* @return array
*/
public function getSupportedMimes()
{
if ($this->mimes) {
return $this->mimes;
}
$response = $this->get(
'mime-types',
array('Accept' => 'application/json')
);
$response->setAuth($this->options['username'], $this->options['password']);
$response->send();
return $this->mimes = $response->getResponse()->json();
}
/**
* Extract text content from a given file.
* Logs a notice-level error if the document can't be parsed.
*
* @param string $file Full filesystem path to a file to post
* @return string Content of the file extracted as plain text
*/
public function tika($file)
{
$text = null;
try {
$response = $this->put(
'tika',
array('Accept' => 'text/plain'),
file_get_contents($file)
);
$response->setAuth($this->options['username'], $this->options['password']);
$response->send();
$text = $response->getResponse()->getBody(true);
} catch (RequestException $e) {
$msg = sprintf(
'TikaRestClient was not able to process %s. Response: %s %s.',
$file,
$e->getResponse()->getStatusCode(),
$e->getResponse()->getReasonPhrase()
);
// Only available if tika-server was started with --includeStack
$body = $e->getResponse()->getBody(true);
if ($body) {
$msg .= ' Body: ' . $body;
}
SS_Log::log($msg, SS_Log::NOTICE);
}
return $text;
}
}