From 545937dadc37a2ca182817ebaf3d96c1802fd419 Mon Sep 17 00:00:00 2001 From: Tom Barrett Date: Mon, 12 Jul 2021 22:03:04 +1000 Subject: [PATCH] More work on #676 to ensure all exotic HTML entities get converted back to actual characters before the text is put in the XML doc. --- calls/call_service_comp_slides.class.php | 4 ++-- calls/call_service_slides.class.php | 4 ++-- include/general.php | 27 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/calls/call_service_comp_slides.class.php b/calls/call_service_comp_slides.class.php index e0e7444b..e230ca24 100644 --- a/calls/call_service_comp_slides.class.php +++ b/calls/call_service_comp_slides.class.php @@ -95,7 +95,7 @@ function run() $textlines = $xpath->query(".//*[text()[contains(., 'contents')]]",$textelements->item($y)->parentNode->parentNode); //populate text elements for ($z = 0; $z < ($numlines); $z++) { - $textlines->item($z)->nodeValue = htmlspecialchars(strip_tags(html_entity_decode($lines[$z])), ENT_QUOTES, 'UTF-8', false); + $textlines->item($z)->nodeValue = xml_safe_string($lines[$z]); } } elseif (strcmp($textelements->item($y)->nodeValue, 'credit') == 0) { //credits textbox @@ -119,7 +119,7 @@ function run() //populate text elements for ($z = 0; $z < ($numlines); $z++) { - $textlines->item($z)->nodeValue = htmlspecialchars(strip_tags(html_entity_decode($lines[$z], ENT_QUOTES, 'UTF-8')), ENT_QUOTES, 'UTF-8', false); + $textlines->item($z)->nodeValue = xml_safe_string($lines[$z]); } } } diff --git a/calls/call_service_slides.class.php b/calls/call_service_slides.class.php index 85309036..41d6d328 100644 --- a/calls/call_service_slides.class.php +++ b/calls/call_service_slides.class.php @@ -106,7 +106,7 @@ function run() $textlines = $xpath->query(".//*[text()[contains(., 'contents')]]",$textelements->item($y)->parentNode->parentNode); //populate text elements for ($z = 0; $z < ($numlines); $z++) { - $textlines->item($z)->nodeValue = htmlspecialchars(strip_tags(html_entity_decode($lines[$z])), ENT_QUOTES, 'UTF-8', false); + $textlines->item($z)->nodeValue = xml_safe_string($lines[$z]); } } elseif (strcmp($textelements->item($y)->nodeValue, 'credit') == 0) { //credits textbox @@ -130,7 +130,7 @@ function run() //populate text elements for ($z = 0; $z < ($numlines); $z++) { - $textlines->item($z)->nodeValue = htmlspecialchars(strip_tags(html_entity_decode($lines[$z], ENT_QUOTES, 'UTF-8')), ENT_QUOTES, 'UTF-8', false); + $textlines->item($z)->nodeValue = xml_safe_string($lines[$z]); } } } diff --git a/include/general.php b/include/general.php index cfd45941..fb3dd39d 100644 --- a/include/general.php +++ b/include/general.php @@ -122,6 +122,33 @@ function ents($str) return htmlspecialchars($str, ENT_QUOTES, "UTF-8", false); } +/** + * Take a string, which may include HTML tags or entities, and prepare it to be XML-safe. + * @param type $x + */ +function xml_safe_string($x) +{ + $res = strip_tags(html_entity_decode($x, ENT_QUOTES, 'UTF-8')); + + // decode some entities that are missed by html_entity_decode in PHP5.3 + $res = str_replace("’", "’", $res); + $res = str_replace("‘", "‘", $res); + $res = str_replace("“", "“", $res); + $res = str_replace("“", "”", $res); + $res = str_replace("–", "–", $res); + $res = str_replace("…", "…", $res); + $res = str_replace("", "'", $res); + $res = str_replace("", "'", $res); + + // now encode the small list of XML entities + $res = str_replace("&", '&', $res); + $res = str_replace("'", ''', $res); + $res = str_replace('"', '"', $res); + $res = str_replace('>', '>', $res); + $res = str_replace('<', '<', $res); + return $res; +} + function redirect($view, $params=Array(), $hash='') { session_write_close();