/
FilesystemPublisher.php
430 lines (357 loc) · 13.4 KB
/
FilesystemPublisher.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
<?php
/**
* @package staticpublisher
*/
class FilesystemPublisher extends StaticPublisher {
/**
* @var string
*/
protected $destFolder = 'cache';
/**
* @var string
*/
protected $fileExtension = 'html';
/**
* @var string
*
* @config
*/
private static $static_base_url = null;
/**
* @config
*
* @var Boolean Use domain based cacheing (put cache files into a domain subfolder)
* This must be true if you are using this with the "subsites" module.
* Please note that this form of caching requires all URLs to be provided absolute
* (not relative to the webroot) via {@link SiteTree->AbsoluteLink()}.
*/
private static $domain_based_caching = false;
/**
* Set a different base URL for the static copy of the site.
* This can be useful if you are running the CMS on a different domain from the website.
*
* @deprecated 3.2 Use the "FilesystemPublisher.static_base_url" config setting instead
*/
static public function set_static_base_url($url) {
Deprecation::notice('3.2', 'Use the "FilesystemPublisher.static_base_url" config setting instead');
Config::inst()->update('FilesystemPublisher', 'static_base_url', $url);
}
/**
* @param $destFolder The folder to save the cached site into.
* This needs to be set in framework/static-main.php as well through the {@link $cacheBaseDir} variable.
* @param $fileExtension The file extension to use, e.g 'html'.
* If omitted, then each page will be placed in its own directory,
* with the filename 'index.html'. If you set the extension to PHP, then a simple PHP script will
* be generated that can do appropriate cache & redirect header negotation.
*/
public function __construct($destFolder = 'cache', $fileExtension = null) {
// Remove trailing slash from folder
if(substr($destFolder, -1) == '/') {
$destFolder = substr($destFolder, 0, -1);
}
$this->destFolder = $destFolder;
if($fileExtension) {
$this->fileExtension = $fileExtension;
}
parent::__construct();
}
/**
* Transforms relative or absolute URLs to their static path equivalent.
* This needs to be the same logic that's used to look up these paths through
* framework/static-main.php. Does not include the {@link $destFolder} prefix.
*
* URL filtering will have already taken place for direct SiteTree links via SiteTree->generateURLSegment()).
* For all other links (e.g. custom controller actions), we assume that they're pre-sanitized
* to suit the filesystem needs, as its impossible to sanitize them without risking to break
* the underlying naming assumptions in URL routing (e.g. controller method names).
*
* Examples (without $domain_based_caching):
* - http://mysite.com/mywebroot/ => /index.html (assuming your webroot is in a subfolder)
* - http://mysite.com/about-us => /about-us.html
* - http://mysite.com/parent/child => /parent/child.html
*
* Examples (with $domain_based_caching):
* - http://mysite.com/mywebroot/ => /mysite.com/index.html (assuming your webroot is in a subfolder)
* - http://mysite.com/about-us => /mysite.com/about-us.html
* - http://myothersite.com/about-us => /myothersite.com/about-us.html
* - http://subdomain.mysite.com/parent/child => /subdomain.mysite.com/parent/child.html
*
* @param array $urls Absolute or relative URLs
* @return array Map of original URLs to filesystem paths (relative to {@link $destFolder}).
*/
public function urlsToPaths($urls) {
$mappedUrls = array();
foreach($urls as $url) {
// parse_url() is not multibyte safe, see https://bugs.php.net/bug.php?id=52923.
// We assume that the URL hsa been correctly encoded either on storage (for SiteTree->URLSegment),
// or through URL collection (for controller method names etc.).
$urlParts = @parse_url($url);
// Remove base folders from the URL if webroot is hosted in a subfolder (same as static-main.php)
$path = isset($urlParts['path']) ? $urlParts['path'] : '';
if(mb_substr(mb_strtolower($path), 0, mb_strlen(BASE_URL)) == mb_strtolower(BASE_URL)) {
$urlSegment = mb_substr($path, mb_strlen(BASE_URL));
} else {
$urlSegment = $path;
}
// Normalize URLs
$urlSegment = trim($urlSegment, '/');
$filename = $urlSegment ? "$urlSegment.$this->fileExtension" : "index.$this->fileExtension";
if (Config::inst()->get('FilesystemPublisher', 'domain_based_caching')) {
if (!$urlParts) continue; // seriously malformed url here...
$filename = $urlParts['host'] . '/' . $filename;
}
$mappedUrls[$url] = ((dirname($filename) == '/') ? '' : (dirname($filename).'/')).basename($filename);
}
return $mappedUrls;
}
/**
* @param array $urls
*/
public function unpublishPages($urls) {
// Do we need to map these?
// Detect a numerically indexed arrays
if (is_numeric(join('', array_keys($urls)))) $urls = $this->urlsToPaths($urls);
// This can be quite memory hungry and time-consuming
// @todo - Make a more memory efficient publisher
increase_time_limit_to();
increase_memory_limit_to();
$cacheBaseDir = $this->getDestDir();
foreach($urls as $url => $path) {
if (file_exists($cacheBaseDir.'/'.$path)) {
@unlink($cacheBaseDir.'/'.$path);
}
}
}
/**
* Uses {@link Director::test()} to perform in-memory HTTP requests
* on the passed-in URLs.
*
* @param array $urls Relative URLs
* @return array Result, keyed by URL. Keys:
* - "statuscode": The HTTP status code
* - "redirect": A redirect location (if applicable)
* - "path": The filesystem path where the cache has been written
*/
public function publishPages($urls) {
$result = array();
//nest the config so we can make changes to the config and revert easily
Config::nest();
// Do we need to map these?
// Detect a numerically indexed arrays
if (is_numeric(join('', array_keys($urls)))) $urls = $this->urlsToPaths($urls);
// This can be quite memory hungry and time-consuming
// @todo - Make a more memory efficient publisher
increase_time_limit_to();
increase_memory_limit_to();
// Set the appropriate theme for this publication batch.
// This may have been set explicitly via StaticPublisher::static_publisher_theme,
// or we can use the last non-null theme.
$customTheme = Config::inst()->get('StaticPublisher', 'static_publisher_theme');
if($customTheme) {
Config::inst()->update('SSViewer', 'theme', $customTheme);
}
// Ensure that the theme that is set gets used.
Config::inst()->update('SSViewer', 'theme_enabled', true);
$staticBaseUrl = Config::inst()->get('FilesystemPublisher', 'static_base_url');
if($staticBaseUrl) {
Config::inst()->update('Director', 'alternate_base_url', $staticBaseUrl);
}
if($this->fileExtension == 'php') {
Config::inst()->update('SSViewer', 'rewrite_hash_links', 'php');
}
if(Config::inst()->get('StaticPublisher', 'echo_progress')) {
echo $this->class.": Publishing to " . $staticBaseUrl . "\n";
}
$files = array();
$i = 0;
$totalURLs = sizeof($urls);
foreach($urls as $url => $path) {
$origUrl = $url;
$result[$origUrl] = array(
'statuscode' => null,
'redirect' => null,
'path' => null
);
$i++;
if($url && !is_string($url)) {
user_error("Bad url:" . var_export($url,true), E_USER_WARNING);
continue;
}
if(Config::inst()->get('StaticPublisher', 'echo_progress')) {
echo " * Publishing page $i/$totalURLs: $url\n";
flush();
}
Requirements::clear();
if($url == "") $url = "/";
if(Director::is_relative_url($url)) $url = Director::absoluteURL($url);
$response = Director::test(str_replace('+', ' ', $url));
if (!$response) continue;
if($response) {
$result[$origUrl]['statuscode'] = $response->getStatusCode();
}
Requirements::clear();
singleton('DataObject')->flushCache();
// Check for ErrorPages generating output - we want to handle this in a special way below.
$isErrorPage = false;
$pageObject = null;
if ($response && is_object($response) && ((int)$response->getStatusCode())>=400) {
$pageObject = SiteTree::get_by_link($url);
if ($pageObject && $pageObject instanceof ErrorPage) $isErrorPage = true;
}
// Skip any responses with a 404 status code unless it's the ErrorPage itself.
if (!$isErrorPage && is_object($response) && $response->getStatusCode()=='404') continue;
// Generate file content
// PHP file caching will generate a simple script from a template
if($this->fileExtension == 'php') {
if(is_object($response)) {
if($response->getStatusCode() == '301' || $response->getStatusCode() == '302') {
$content = $this->generatePHPCacheRedirection($response->getHeader('Location'));
} else {
$content = $this->generatePHPCacheFile($response->getBody(), HTTP::get_cache_age(), date('Y-m-d H:i:s'), $response->getHeader('Content-Type'));
}
} else {
$content = $this->generatePHPCacheFile($response . '', HTTP::get_cache_age(), date('Y-m-d H:i:s'), $response->getHeader('Content-Type'));
}
// HTML file caching generally just creates a simple file
} else {
if(is_object($response)) {
if($response->getStatusCode() == '301' || $response->getStatusCode() == '302') {
$absoluteURL = Director::absoluteURL($response->getHeader('Location'));
$result[$origUrl]['redirect'] = $response->getHeader('Location');
$content = "<meta http-equiv=\"refresh\" content=\"2; URL=$absoluteURL\">";
} else {
$content = $response->getBody();
}
} else {
$content = $response . '';
}
}
if(Config::inst()->get('StaticPublisher', 'include_caching_metadata')) {
$content = str_replace(
'</html>',
sprintf("</html>\n\n<!-- %s -->", implode(" ", $this->getMetadata($url))),
$content
);
}
if (!$isErrorPage) {
$files[$origUrl] = array(
'Content' => $content,
'Folder' => dirname($path).'/',
'Filename' => basename($path),
);
} else {
// Generate a static version of the error page with a standardised name, so they can be plugged
// into catch-all webserver statements such as Apache's ErrorDocument.
$code = (int)$response->getStatusCode();
$files[$origUrl] = array(
'Content' => $content,
'Folder' => dirname($path).'/',
'Filename' => "error-$code.html",
);
}
// Add externals
/*
$externals = $this->externalReferencesFor($content);
if($externals) foreach($externals as $external) {
// Skip absolute URLs
if(preg_match('/^[a-zA-Z]+:\/\//', $external)) continue;
// Drop querystring parameters
$external = strtok($external, '?');
if(file_exists("../" . $external)) {
// Break into folder and filename
if(preg_match('/^(.*\/)([^\/]+)$/', $external, $matches)) {
$files[$external] = array(
"Copy" => "../$external",
"Folder" => $matches[1],
"Filename" => $matches[2],
);
} else {
user_error("Can't parse external: $external", E_USER_WARNING);
}
} else {
$missingFiles[$external] = true;
}
}*/
}
//return config to its previous state
Config::unnest();
$base = BASE_PATH . "/$this->destFolder";
foreach($files as $origUrl => $file) {
Filesystem::makeFolder("$base/$file[Folder]");
$path = "$base/$file[Folder]$file[Filename]";
$result[$origUrl]['path'] = $path;
if(isset($file['Content'])) {
$fh = fopen($path, "w");
fwrite($fh, $file['Content']);
fclose($fh);
} else if(isset($file['Copy'])) {
copy($file['Copy'], $path);
}
}
return $result;
}
/**
* Generate the templated content for a PHP script that can serve up the
* given piece of content with the given age and expiry.
*
* @param string $content
* @param string $age
* @param string $lastModified
* @param string $contentType
*
* @return string
*/
protected function generatePHPCacheFile($content, $age, $lastModified, $contentType) {
$template = file_get_contents(STATIC_MODULE_DIR . '/code/CachedPHPPage.tmpl');
return str_replace(
array('**MAX_AGE**', '**LAST_MODIFIED**', '**CONTENT**', '**CONTENT_TYPE**'),
array((int)$age, $lastModified, $content, $contentType),
$template
);
}
/**
* Generate the templated content for a PHP script that can serve up a 301
* redirect to the given destination.
*
* @param string $destination
*
* @return string
*/
protected function generatePHPCacheRedirection($destination) {
$template = file_get_contents(STATIC_MODULE_DIR . '/code/CachedPHPRedirection.tmpl');
return str_replace(
array('**DESTINATION**'),
array($destination),
$template
);
}
/**
* @return string
*/
public function getDestDir() {
return BASE_PATH . '/' . $this->destFolder;
}
/**
* Return an array of all the existing static cache files, as a map of
* URL => file. Only returns cache files that will actually map to a URL,
* based on urlsToPaths.
*
* @return array
*/
public function getExistingStaticCacheFiles() {
$cacheDir = BASE_PATH . '/' . $this->destFolder;
$urlMapper = array_flip($this->urlsToPaths($this->owner->allPagesToCache()));
$output = array();
// Glob each dir, then glob each one of those
foreach(glob("$cacheDir/*", GLOB_ONLYDIR) as $cacheDir) {
foreach(glob($cacheDir.'/*') as $cacheFile) {
$mapKey = str_replace(BASE_PATH . "/cache/","",$cacheFile);
if(isset($urlMapper[$mapKey])) {
$url = $urlMapper[$mapKey];
$output[$url] = $cacheFile;
}
}
}
return $output;
}
}