Skip to content

Commit

Permalink
Merge branch 'MDL-70295' of git://github.com/paulholden/moodle
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewnicols committed Dec 9, 2020
2 parents cd38096 + 1e3449c commit 9d8a0d3
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 22 deletions.
61 changes: 44 additions & 17 deletions lib/html2text/Html2Text.php
Expand Up @@ -28,22 +28,22 @@ class Html2Text
/**
* Contains the HTML content to convert.
*
* @type string
* @var string $html
*/
protected $html;

/**
* Contains the converted, formatted text.
*
* @type string
* @var string $text
*/
protected $text;

/**
* List of preg* regular expression patterns to search for,
* used in conjunction with $replace.
*
* @type array
* @var array $search
* @see $replace
*/
protected $search = array(
Expand All @@ -54,6 +54,7 @@ class Html2Text
'/<style\b[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
'/<i\b[^>]*>(.*?)<\/i>/i', // <i>
'/<em\b[^>]*>(.*?)<\/em>/i', // <em>
'/<ins\b[^>]*>(.*?)<\/ins>/i', // <ins>
'/(<ul\b[^>]*>|<\/ul>)/i', // <ul> and </ul>
'/(<ol\b[^>]*>|<\/ol>)/i', // <ol> and </ol>
'/(<dl\b[^>]*>|<\/dl>)/i', // <dl> and </dl>
Expand All @@ -73,7 +74,7 @@ class Html2Text
/**
* List of pattern replacements corresponding to patterns searched.
*
* @type array
* @var array $replace
* @see $search
*/
protected $replace = array(
Expand All @@ -84,6 +85,7 @@ class Html2Text
'', // <style>s -- which strip_tags supposedly has problems with
'_\\1_', // <i>
'_\\1_', // <em>
'_\\1_', // <ins>
"\n\n", // <ul> and </ul>
"\n\n", // <ol> and </ol>
"\n\n", // <dl> and </dl>
Expand All @@ -104,7 +106,7 @@ class Html2Text
* List of preg* regular expression patterns to search for,
* used in conjunction with $entReplace.
*
* @type array
* @var array $entSearch
* @see $entReplace
*/
protected $entSearch = array(
Expand All @@ -118,7 +120,7 @@ class Html2Text
/**
* List of pattern replacements corresponding to patterns searched.
*
* @type array
* @var array $entReplace
* @see $entSearch
*/
protected $entReplace = array(
Expand All @@ -133,14 +135,15 @@ class Html2Text
* List of preg* regular expression patterns to search for
* and replace using callback function.
*
* @type array
* @var array $callbackSearch
*/
protected $callbackSearch = array(
'/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6
'/[ ]*<(p)( [^>]*)?>(.*?)<\/p>[ ]*/si', // <p> with surrounding whitespace.
'/<(br)[^>]*>[ ]*/i', // <br> with leading whitespace after the newline.
'/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
'/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong>
'/<(del)( [^>]*)?>(.*?)<\/del>/i', // <del>
'/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th>
'/<(a) [^>]*href=("|\')([^"\']+)\2([^>]*)>(.*?)<\/a>/i' // <a href="">
);
Expand All @@ -149,7 +152,7 @@ class Html2Text
* List of preg* regular expression patterns to search for in PRE body,
* used in conjunction with $preReplace.
*
* @type array
* @var array $preSearch
* @see $preReplace
*/
protected $preSearch = array(
Expand All @@ -163,7 +166,7 @@ class Html2Text
/**
* List of pattern replacements corresponding to patterns searched for PRE body.
*
* @type array
* @var array $preReplace
* @see $preSearch
*/
protected $preReplace = array(
Expand All @@ -177,37 +180,37 @@ class Html2Text
/**
* Temporary workspace used during PRE processing.
*
* @type string
* @var string $preContent
*/
protected $preContent = '';

/**
* Contains the base URL that relative links should resolve to.
*
* @type string
* @var string $baseurl
*/
protected $baseurl = '';

/**
* Indicates whether content in the $html variable has been converted yet.
*
* @type boolean
* @var boolean $converted
* @see $html, $text
*/
protected $converted = false;

/**
* Contains URL addresses from links to be rendered in plain text.
*
* @type array
* @var array $linkList
* @see buildlinkList()
*/
protected $linkList = array();

/**
* Various configuration options (able to be set in the constructor)
*
* @type array
* @var array $options
*/
protected $options = array(
'do_links' => 'inline', // 'none'
Expand Down Expand Up @@ -281,7 +284,7 @@ public function set_html($html, $from_file = false)
/**
* Returns the text, converted from HTML.
*
* @return string
* @return string Plain text
*/
public function getText()
{
Expand Down Expand Up @@ -414,7 +417,7 @@ protected function buildlinkList($link, $display, $linkOverride = null)
}

// Ignored link types
if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
if (preg_match('!^(javascript:|mailto:|#)!i', html_entity_decode($link))) {
return $display;
}

Expand Down Expand Up @@ -450,6 +453,11 @@ protected function buildlinkList($link, $display, $linkOverride = null)
}
}

/**
* Helper function for PRE body conversion.
*
* @param string &$text HTML content
*/
protected function convertPre(&$text)
{
// get the content of PRE element
Expand Down Expand Up @@ -486,7 +494,7 @@ protected function convertPre(&$text)
/**
* Helper function for BLOCKQUOTE body conversion.
*
* @param string $text HTML content
* @param string &$text HTML content
*/
protected function convertBlockquotes(&$text)
{
Expand Down Expand Up @@ -563,6 +571,8 @@ protected function pregCallback($matches)
case 'b':
case 'strong':
return $this->toupper($matches[3]);
case 'del':
return $this->tostrike($matches[3]);
case 'th':
return $this->toupper("\t\t" . $matches[3] . "\n");
case 'h':
Expand Down Expand Up @@ -628,4 +638,21 @@ protected function strtoupper($str)

return $str;
}

/**
* Helper function for DEL conversion.
*
* @param string $text HTML content
* @return string Converted text
*/
protected function tostrike($str)
{
$rtn = '';
for ($i = 0; $i < mb_strlen($str); $i++) {
$chr = mb_substr($str, $i, 1);
$combiningChr = chr(0xC0 | 0x336 >> 6). chr(0x80 | 0x336 & 0x3F);
$rtn .= $chr . $combiningChr;
}
return $rtn;
}
}
9 changes: 5 additions & 4 deletions lib/html2text/readme_moodle.txt
@@ -1,4 +1,4 @@
Description of Html2Text library import into Moodle
Description of Html2Text v4.3.1 library import into Moodle

Please note that we override some mb_* functions in Html2Text's namespace at
run time. Until Html2Text adds some sort of fallback for the mb_* functions
Expand All @@ -7,7 +7,8 @@ running PHP without mbstring don't see nasty undefined function errors.

Instructions
------------
1. Clone https://github.com/mtibben/html2text.git into an unrelated directory
2. Copy /path/to/html2text/src/Html2Text.php to lib/html2text/
1. Download the latest release of Html2Text from https://github.com/mtibben/html2text/releases/
2. Extract the contents of the release archive into a directory.
3. Copy src/Html2Text.php to lib/html2text/

Imported from: https://github.com/mtibben/html2text.git
Imported from: https://github.com/mtibben/html2text/releases/
2 changes: 1 addition & 1 deletion lib/thirdpartylibs.xml
Expand Up @@ -158,7 +158,7 @@
<location>html2text</location>
<name>HTML2Text</name>
<license>GPL</license>
<version>4.2.1</version>
<version>4.3.1</version>
<licenseversion>2.0+</licenseversion>
</library>
<library>
Expand Down

0 comments on commit 9d8a0d3

Please sign in to comment.