mirrored from git://git.moodle.org/moodle.git
/
document.php
484 lines (433 loc) · 15.7 KB
/
document.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Document representation.
*
* @package core_search
* @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
namespace core_search;
defined('MOODLE_INTERNAL') || die();
/**
* Represents a document to index.
*
* Note that, if you are writting a search engine and you want to change \core_search\document
* behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document.
*
* @package core_search
* @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class document implements \renderable, \templatable {
/**
* @var array $data The document data.
*/
protected $data = array();
/**
* @var array Extra data needed to render the document.
*/
protected $extradata = array();
/**
* @var \moodle_url Link to the document.
*/
protected $docurl = null;
/**
* @var \moodle_url Link to the document context.
*/
protected $contexturl = null;
/**
* @var int|null The content field filearea.
*/
protected $contentfilearea = null;
/**
* @var int|null The content field itemid.
*/
protected $contentitemid = null;
/**
* All required fields any doc should contain.
*
* We have to choose a format to specify field types, using solr format as we have to choose one and solr is the
* default search engine.
*
* Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format
* they need.
*
* @var array
*/
protected static $requiredfields = array(
'id' => array(
'type' => 'string',
'stored' => true,
'indexed' => false
),
'itemid' => array(
'type' => 'int',
'stored' => true,
'indexed' => true
),
'title' => array(
'type' => 'string',
'stored' => true,
'indexed' => true
),
'content' => array(
'type' => 'string',
'stored' => true,
'indexed' => true
),
'contextid' => array(
'type' => 'int',
'stored' => true,
'indexed' => true
),
'areaid' => array(
'type' => 'string',
'stored' => true,
'indexed' => true
),
'type' => array(
'type' => 'int',
'stored' => true,
'indexed' => true
),
'courseid' => array(
'type' => 'int',
'stored' => true,
'indexed' => false
),
'owneruserid' => array(
'type' => 'int',
'stored' => true,
'indexed' => true
),
'modified' => array(
'type' => 'tdate',
'stored' => true,
'indexed' => true
),
);
/**
* All optional fields docs can contain.
*
* Although it matches solr fields format, this is just to define the field types. Search
* engine plugins are responsible of setting their appropriate field types and map these
* naming to whatever format they need.
*
* @var array
*/
protected static $optionalfields = array(
'userid' => array(
'type' => 'int',
'stored' => true,
'indexed' => false
),
'description1' => array(
'type' => 'string',
'stored' => true,
'indexed' => true
),
'description2' => array(
'type' => 'string',
'stored' => true,
'indexed' => true
),
);
/**
* We ensure that the document has a unique id across search areas.
*
* @param int $itemid An id unique to the search area
* @param string $componentname The search area component Frankenstyle name
* @param string $areaname The area name (the search area class name)
* @return void
*/
public function __construct($itemid, $componentname, $areaname) {
if (!is_numeric($itemid)) {
throw new \coding_exception('The itemid should be an integer');
}
$this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname);
$this->data['id'] = $this->data['areaid'] . '-' . $itemid;
$this->data['itemid'] = intval($itemid);
}
/**
* Setter.
*
* Basic checkings to prevent common issues.
*
* If the field is a string tags will be stripped, if it is an integer or a date it
* will be casted to a PHP integer. tdate fields values are expected to be timestamps.
*
* @throws \coding_exception
* @param string $fieldname The field name
* @param string|int $value The value to store
* @return string|int The stored value
*/
public function set($fieldname, $value) {
if (!empty(static::$requiredfields[$fieldname])) {
$fielddata = static::$requiredfields[$fieldname];
} else if (!empty(static::$optionalfields[$fieldname])) {
$fielddata = static::$optionalfields[$fieldname];
}
if (empty($fielddata)) {
throw new \coding_exception('"' . $fieldname . '" field does not exist.');
}
// tdate fields should be set as timestamps, later they might be converted to
// a date format, it depends on the search engine.
if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) {
throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"');
}
// We want to be strict here, there might be engines that expect us to
// provide them data with the proper type already set.
if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') {
$this->data[$fieldname] = intval($value);
} else {
// Clean up line breaks and extra spaces.
$this->data[$fieldname] = preg_replace("/\s+/", ' ', trim($value, "\r\n"));
}
return $this->data[$fieldname];
}
/**
* Sets data to this->extradata
*
* This data can be retrieved using \core_search\document->get($fieldname).
*
* @param string $fieldname
* @param string $value
* @return void
*/
public function set_extra($fieldname, $value) {
$this->extradata[$fieldname] = $value;
}
/**
* Getter.
*
* Use self::is_set if you are not sure if this field is set or not
* as otherwise it will trigger a \coding_exception
*
* @throws \coding_exception
* @param string $field
* @return string|int
*/
public function get($field) {
if (isset($this->data[$field])) {
return $this->data[$field];
}
// Fallback to extra data.
if (isset($this->extradata[$field])) {
return $this->extradata[$field];
}
throw new \coding_exception('Field "' . $field . '" is not set in the document');
}
/**
* Checks if a field is set.
*
* @param string $field
* @return bool
*/
public function is_set($field) {
return (isset($this->data[$field]) || isset($this->extradata[$field]));
}
/**
* Returns all default fields definitions.
*
* @return array
*/
public static function get_default_fields_definition() {
return static::$requiredfields + static::$optionalfields;
}
/**
* Formats the timestamp preparing the time fields to be inserted into the search engine.
*
* By default it just returns a timestamp so any search engine could just store integers
* and use integers comparison to get documents between x and y timestamps, but search
* engines might be interested in using their own field formats. They can do it extending
* this class in \search_xxx\document.
*
* @param int $timestamp
* @return string
*/
public static function format_time_for_engine($timestamp) {
return $timestamp;
}
/**
* Formats a string value for the search engine.
*
* Search engines may overwrite this method to apply restrictions, like limiting the size.
* The default behaviour is just returning the string.
*
* @param string $string
* @return string
*/
public static function format_string_for_engine($string) {
return $string;
}
/**
* Returns a timestamp from the value stored in the search engine.
*
* By default it just returns a timestamp so any search engine could just store integers
* and use integers comparison to get documents between x and y timestamps, but search
* engines might be interested in using their own field formats. They should do it extending
* this class in \search_xxx\document.
*
* @param string $time
* @return int
*/
public static function import_time_from_engine($time) {
return $time;
}
/**
* Returns how text is returned from the search engine.
*
* @return int
*/
protected function get_text_format() {
return FORMAT_PLAIN;
}
/**
* Fills the document with data coming from the search engine.
*
* @throws \core_search\engine_exception
* @param array $docdata
* @return void
*/
public function set_data_from_engine($docdata) {
$fields = static::$requiredfields + static::$optionalfields;
foreach ($fields as $fieldname => $field) {
// Optional params might not be there.
if (isset($docdata[$fieldname])) {
if ($field['type'] === 'tdate') {
// Time fields may need a preprocessing.
$this->set($fieldname, static::import_time_from_engine($docdata[$fieldname]));
} else {
// No way we can make this work if there is any multivalue field.
if (is_array($docdata[$fieldname])) {
throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname);
}
$this->set($fieldname, $docdata[$fieldname]);
}
}
}
}
/**
* Sets the document url.
*
* @param \moodle_url $url
* @return void
*/
public function set_doc_url(\moodle_url $url) {
$this->docurl = $url;
}
/**
* Gets the url to the doc.
*
* @return \moodle_url
*/
public function get_doc_url() {
return $this->docurl;
}
public function set_context_url(\moodle_url $url) {
$this->contexturl = $url;
}
/**
* Gets the url to the context.
*
* @return \moodle_url
*/
public function get_context_url() {
return $this->contexturl;
}
/**
* Returns the document ready to submit to the search engine.
*
* @throws \coding_exception
* @return array
*/
public function export_for_engine() {
// We don't want to affect the document instance.
$data = $this->data;
// Apply specific engine-dependant formats and restrictions.
foreach (static::$requiredfields as $fieldname => $field) {
// We also check that we have everything we need.
if (!isset($data[$fieldname])) {
throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"');
}
if ($field['type'] === 'tdate') {
// Overwrite the timestamp with the engine dependant format.
$data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
} else if ($field['type'] === 'string') {
// Overwrite the timestamp with the engine dependant format.
$data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
}
}
foreach (static::$optionalfields as $fieldname => $field) {
if (!isset($data[$fieldname])) {
continue;
}
if ($field['type'] === 'tdate') {
// Overwrite the timestamp with the engine dependant format.
$data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
} else if ($field['type'] === 'string') {
// Overwrite the timestamp with the engine dependant format.
$data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
}
}
return $data;
}
/**
* Export the document data to be used as a template context.
*
* Adding more info than the required one as people might be interested in extending the template.
*
* Although content is a required field when setting up the document, it accepts '' (empty) values
* as they may be the result of striping out HTML.
*
* @param renderer_base $output The renderer.
* @return array
*/
public function export_for_template(\renderer_base $output) {
list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($this->get('areaid'));
$data = [
'courseurl' => new \moodle_url('/course/view.php?id=' . $this->get('courseid')),
'coursefullname' => format_string($this->get('coursefullname'), true, array('context' => $this->get('contextid'))),
'modified' => userdate($this->get('modified')),
'title' => format_string($this->get('title'), true, array('context' => $this->get('contextid'))),
'docurl' => $this->get_doc_url(),
'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null,
'contexturl' => $this->get_context_url(),
'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null,
'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null,
];
if ($this->is_set('userid')) {
$data['userurl'] = new \moodle_url('/user/view.php', array('id' => $this->get('userid'), 'course' => $this->get('courseid')));
$data['userfullname'] = format_string($this->get('userfullname'), true, array('context' => $this->get('contextid')));
}
return $data;
}
/**
* Formats a text string coming from the search engine.
*
* By default just return the text as it is:
* - Search areas are responsible of sending just plain data, the search engine may
* append HTML or markdown to it (highlighing for example).
* - The view is responsible of shortening the text if it is too big
*
* @param string $text Text to format
* @return string HTML text to be renderer
*/
protected function format_text($text) {
return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid')));
}
}