Permalink
Browse files

Merge pull request #413 from chillu/urlsegment-multibyte-fix

URLSegment multibyte fixes
  • Loading branch information...
2 parents 1671b9c + 18dbdd2 commit 9950fa362ee197f805332b4b73cc5070c683a5a3 @chillu chillu committed May 9, 2012
@@ -152,7 +152,7 @@ function setBody($body) {
$this->body = $body;
// Set content-length in bytes. Use mbstring to avoid problems with mb_internal_encoding() and mbstring.func_overload
- $this->headers['Content-Length'] = (function_exists('mb_strlen') ? mb_strlen($this->body,'8bit') : strlen($this->body));
+ $this->headers['Content-Length'] = mb_strlen($this->body,'8bit');
}
function getBody() {
@@ -64,13 +64,17 @@ function filter($name) {
$name = mb_strtolower($name);
$replacements = $this->getReplacements();
- if($this->getAllowMultibyte()) {
- // unset automated removal of non-ASCII characters, and don't try to transliterate
- if(isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']);
- }
+
+ // Unset automated removal of non-ASCII characters, and don't try to transliterate
+ if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']);
+
foreach($replacements as $regex => $replace) {
$name = preg_replace($regex, $replace, $name);
}
+
+ // Multibyte URLs require percent encoding to comply to RFC 3986.
+ // Without this setting, the "remove non-ASCII chars" regex takes care of that.
+ if($this->getAllowMultibyte()) $name = rawurlencode($name);
return $name;
}
View
@@ -83,9 +83,11 @@
$file = isset($homepageMap[$_SERVER['HTTP_HOST']]) ? $homepageMap[$_SERVER['HTTP_HOST']] : $file;
}
+ // Encode each part of the path individually, in order to support multibyte paths.
+ // SiteTree.URLSegment and hence the static folder and filenames are stored in encoded form,
+ // to avoid filesystem incompatibilities.
+ $file = implode('/', array_map('rawurlencode', explode('/', $file)));
// Find file by extension (either *.html or *.php)
- $file = preg_replace('/[^a-zA-Z0-9\/\-_]/si', '-', $file);
-
if (file_exists($cacheBaseDir . $cacheDir . $file . '.html')) {
header('X-SilverStripe-Cache: hit at '.@date('r'));
echo file_get_contents($cacheBaseDir . $cacheDir . $file . '.html');
@@ -116,10 +116,13 @@ function testJSON2Obj() {
* @todo test toASCII()
*/
function testRaw2URL() {
+ $orig = URLSegmentFilter::$default_allow_multibyte;
+ URLSegmentFilter::$default_allow_multibyte = false;
$this->assertEquals('foo', Convert::raw2url('foo'));
$this->assertEquals('foo-and-bar', Convert::raw2url('foo & bar'));
$this->assertEquals('foo-and-bar', Convert::raw2url('foo & bar!'));
$this->assertEquals('foos-bar-2', Convert::raw2url('foo\'s [bar] (2)'));
+ URLSegmentFilter::$default_allow_multibyte = $orig;
}
}
@@ -27,7 +27,7 @@ function testRetainsNonAsciiUrlsWithAllowMultiByteOption() {
$f = new URLSegmentFilter();
$f->setAllowMultibyte(true);
$this->assertEquals(
- 'brötchen',
+ urlencode('brötchen'),
$f->filter('Brötchen')
);
}

0 comments on commit 9950fa3

Please sign in to comment.