Browse files

MDL-40585 backup: cache XML parent paths

For $this->groupedpaths, using a key is faster as there is
no need to do in_array searches.

Parent cache allows dirname calls to be substantially reduced.
2048 was chosen as a cache size as this class operates on chunks
of the restore at a time, 8k questions produces a parent cache of
about 500 paths and uses 300K of memory.  Scaling up to 2048 will
use about 1.2M of RAM for really large restores.  This is acceptable
for the 48% function call reduction and the 10% runtime improvement
seen.
  • Loading branch information...
1 parent c2ad901 commit 619fc0cd3c01af69d6f9ccf25a932b992f15e3cb @mr-russ mr-russ committed Jul 11, 2013
Showing with 42 additions and 5 deletions.
  1. +42 −5 backup/util/xml/parser/processors/grouped_parser_processor.class.php
View
47 backup/util/xml/parser/processors/grouped_parser_processor.class.php
@@ -42,6 +42,18 @@
protected $groupedpaths; // Paths we are requesting grouped
protected $currentdata; // Where we'll be acummulating data
+
+ /**
+ * Keep cache of parent directory paths for XML parsing.
+ * @var array
+ */
+ protected $parentcache = array();
+
+ /**
+ * Remaining space for parent directory paths.
+ * @var integer
+ */
+ protected $parentcacheavailablesize = 2048;
public function __construct(array $paths = array()) {
$this->groupedpaths = array();
@@ -65,7 +77,7 @@ public function add_path($path, $grouped = false) {
$a->child = $found;
throw new progressive_parser_exception('xml_grouped_child_found', $a);
}
- $this->groupedpaths[] = $path;
+ $this->groupedpaths[$path] = true;
}
parent::add_path($path);
}
@@ -141,7 +153,7 @@ protected function postprocess_chunk($data) {
}
protected function path_is_grouped($path) {
- return in_array($path, $this->groupedpaths);
+ return isset($this->groupedpaths[$path]);
}
/**
@@ -150,24 +162,49 @@ protected function path_is_grouped($path) {
* false if not
*/
protected function grouped_parent_exists($path) {
- $parentpath = progressive_parser::dirname($path);
+ $parentpath = $this->get_parent_path($path);
+
while ($parentpath != '/') {
if ($this->path_is_grouped($parentpath)) {
return $parentpath;
}
- $parentpath = progressive_parser::dirname($parentpath);
+ $parentpath = $this->get_parent_path($parentpath);
}
return false;
}
/**
+ * Get the parent path using a local cache for performance.
+ *
+ * @param $path string The pathname you wish to obtain the parent name for.
+ * @return string The parent pathname.
+ */
+ protected function get_parent_path($path) {
+ if (!isset($this->parentcache[$path])) {
+ $this->parentcache[$path] = progressive_parser::dirname($path);
+ $this->parentcacheavailablesize--;
+ if ($this->parentcacheavailablesize < 0) {
+ // Older first is cheaper than LRU. We use 10% as items are grouped together and the large quiz
+ // restore from MDL-40585 used only 600 parent paths. This is an XML heirarchy, so common paths
+ // are grouped near each other. eg; /question_bank/question_category/question/element. After keeping
+ // question_bank paths in the cache when we move to another area and the question_bank cache is not
+ // useful any longer.
+ $this->parentcache = array_slice($this->parentcache, 200, null, true);
+ $this->parentcacheavailablesize += 200;
+ }
+ }
+ return $this->parentcache[$path];
+ }
+
+
+ /**
* Function that will look for any grouped
* child for the given path, returning it if found,
* false if not
*/
protected function grouped_child_exists($path) {
$childpath = $path . '/';
- foreach ($this->groupedpaths as $groupedpath) {
+ foreach ($this->groupedpaths as $groupedpath => $set) {
if (strpos($groupedpath, $childpath) === 0) {
return $groupedpath;
}

0 comments on commit 619fc0c

Please sign in to comment.