Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed replace modifier by converting encoding if needed #740

Merged
merged 1 commit into from
Jul 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions libs/plugins/shared.mb_str_replace.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,43 @@ function smarty_mb_str_replace($search, $replace, $subject, &$count = 0)
}
}
} else {
$mb_reg_charset = mb_regex_encoding();
// Check if mbstring regex is using UTF-8
$reg_is_unicode = !strcasecmp($mb_reg_charset, "UTF-8");
if(!$reg_is_unicode) {
// ...and set to UTF-8 if not
mb_regex_encoding("UTF-8");
}

// See if charset used by Smarty is matching one used by regex...
$current_charset = mb_regex_encoding();
$convert_result = (bool)strcasecmp(Smarty::$_CHARSET, $current_charset);
if($convert_result) {
// ...convert to it if not.
$subject = mb_convert_encoding($subject, $current_charset, Smarty::$_CHARSET);
$search = mb_convert_encoding($search, $current_charset, Smarty::$_CHARSET);
$replace = mb_convert_encoding($replace, $current_charset, Smarty::$_CHARSET);
}

$parts = mb_split(preg_quote($search), $subject);
// If original regex encoding was not unicode...
if(!$reg_is_unicode) {
// ...restore original regex encoding to avoid breaking the system.
mb_regex_encoding($mb_reg_charset);
}
if($parts === false) {
// This exception is thrown if call to mb_split failed.
// Usually it happens, when $search or $replace are not valid for given mb_regex_encoding().
// There may be other cases for it to fail, please file an issue if you find a reproducible one.
throw new SmartyException("Source string is not a valid $current_charset sequence (probably)");
}

$count = count($parts) - 1;
$subject = implode($replace, $parts);
// Convert results back to charset used by Smarty, if needed.
if($convert_result) {
$subject = mb_convert_encoding($subject, Smarty::$_CHARSET, $current_charset);
}
}
return $subject;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
<?php
/**
* Smarty PHPunit tests - issue #549 regression tests
*
* @package PHPunit
* @author Andrey Repin <anrdaemon@yandex.ru>
*/

/**
* class for compiler tests
*
* @runTestsInSeparateProcess
* @preserveGlobalState disabled
* @backupStaticAttributes enabled
*
* mb_split breaks if Smarty encoding is not the same as mbstring regex encoding.
*/
class MbSplitEncodingIssue549Test extends PHPUnit_Smarty
{
/** @var string Saved Smarty charset */
private $charset;

/** @var array Source data for tests, hexed to protect from accidental reencoding */
private $data = array(
"subject" => '4772c3bc6e6577616c64', // "Grünewald"
"pattern" => '77616c64', // "wald"
"replacement" => '7374c3bc726d', // "stürm"
"result" => '4772c3bc6e657374c3bc726d', // "Grünestürm"
);

public function setUp()
{
if(!\Smarty::$_MBSTRING)
{
return $this->markTestSkipped("mbstring extension is not in use by Smarty");
}

$this->charset = \Smarty::$_CHARSET;
$this->setUpSmarty(dirname(__FILE__));
}

public function tearDown()
{
\Smarty::$_CHARSET = $this->charset ?: \Smarty::$_CHARSET;
$this->cleanDirs();
}

/** Provider for testReplaceModifier
*/
public function encodingPairsProvider()
{
return array(
"with non-UNICODE src/non-UNICODE regex (PHP < 5.6 default)" => array("Windows-1252", "EUC-JP"),
"with UTF-8 src/non-UNICODE regex (PHP < 5.6 default)" => array("UTF-8", "EUC-JP"),
"with UTF-8 src/UTF-8 regex (PHP >= 5.6)" => array("UTF-8", "UTF-8"),
"with non-UNICODE src/UTF-8 regex" => array("Windows-1252", "UTF-8"),
);
}

/** Test behavior of `replace` modifier with different source and regex encodings
*
* @dataProvider encodingPairsProvider
*/
public function testReplaceModifier($mb_int_encoding, $mb_regex_encoding)
{
$data = $this->data;
\array_walk($data, function(&$value, $key) use($mb_int_encoding) {
$value = \mb_convert_encoding(pack("H*", $value), $mb_int_encoding, "UTF-8");
});
\extract($data, \EXTR_SKIP);

\mb_regex_encoding($mb_regex_encoding);
\Smarty::$_CHARSET = $mb_int_encoding;
$this->assertEquals($result, $this->smarty->fetch("string:{\"$subject\"|replace:\"$pattern\":\"$replacement\"}"));
}

}
2 changes: 2 additions & 0 deletions tests/UnitTests/TemplateSource/_Issues/549/cache/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Ignore anything in here, but keep this directory
*
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Ignore anything in here, but keep this directory
*