Permalink
Browse files

Finished writing my crazy widow word prevention logic.

  • Loading branch information...
1 parent 442372b commit da7e484051496a82e23a571c5ddd017114887160 @psychoticmeow psychoticmeow committed Sep 23, 2011
Showing with 103 additions and 59 deletions.
  1. +77 −50 libs/doccy-utilities.php
  2. +21 −5 libs/doccy.php
  3. +5 −4 test.php
View
127 libs/doccy-utilities.php
@@ -363,65 +363,92 @@ function prettyPrintText(\Doccy\Template $document, Options $options) {
);
}
- // Find all text nodes:
- $nodes = $xpath->query('//text()');
-
- foreach ($nodes as $index => $node) {
- if ($node->parentNode->isPrettyPrintable() === false) continue;
+ // Find all block elements:
+ foreach ($xpath->query('//*') as $node) {
+ if ($node->isBlockLevel() === false) continue;
+
+ // Don't prettify blocks that contain blocks:
+ foreach ($xpath->query('.//*', $node) as $child) {
+ if ($child->isBlockLevel()) {
+ continue 2;
+ }
+ }
- $value = $node->nodeValue;
+ // Find all text nodes beneath this element:
+ $texts = $xpath->query('.//text()', $node);
+ $index = $texts->length - 1;
+ $search_for_widowed_word = 1;
- // Escape nasties:
- if ($options->convert_textual_elements) {
- $value = str_replace(
- array('&', '<', '>'),
- array('&amp;', '&lt;', '&gt;'),
- $value
- );
- }
+ // Work through text nodes backwards:
+ while ($text = $texts->item($index)) {
+ $value = $text->nodeValue;
- // Prevent widowed words:
- if ($options->prevent_widowed_words) {
- /**
- * Find the last text node in a sentence and
- * prevent widowed words.
- *
- * This implementation is imperfect, The following
- * situations would not result in expected behaviour:
- *
- * <p><em>This will not be escaped</em></p>
- * <p>This will not <em>be escaped</em></p>
- */
+ // Prevent widowed words:
if (
- $node->nextSibling === null
- && $node->parentNode->isBlockLevel()
- && trim($value)
+ $options->prevent_widowed_words
+ && $search_for_widowed_word
) {
- $value = preg_replace(
- '/((^|\s)\S{0,20})\s(\S{0,20})$/',
- utf8_encode("\\1\xa0\\3"),
- $value
- );
+ // We've already skipped a good sized "word":
+ if ($search_for_widowed_word > 16) {
+ $search_for_widowed_word = false;
+ }
+
+ // We've got a text node that is pretty printable,
+ // contains at least one space, and is either right
+ // before a long word, or is a word itself.
+ else if (
+ $text->parentNode->isPrettyPrintable()
+ && preg_match('/\s/', $value)
+ && (
+ trim($value)
+ || $search_for_widowed_word > 1
+ )
+ ) {
+ $value = preg_replace(
+ '/((^|\s)\S{0,20})\s(\S{0,20})$/',
+ //utf8_encode("\\1\xa0\\3"),
+ '\1★\3',
+ $value
+ );
+
+ $search_for_widowed_word = false;
+ }
+
+ // Count the number of characters skipped so we
+ // don't put the non-breaking space at the start
+ // of a massive long word.
+ else {
+ $search_for_widowed_word += strlen(trim($value));
+ }
}
- else {
- /**
- * @todo Alternate implementation?
- */
+ // Other pretty printing:
+ if ($text->parentNode->isPrettyPrintable()) {
+ // Escape nasties:
+ if ($options->convert_textual_elements) {
+ $value = str_replace(
+ array('&', '<', '>'),
+ array('&amp;', '&lt;', '&gt;'),
+ $value
+ );
+ }
+
+ // Apply prettification rules:
+ $value = preg_replace($search, $replace, $value);
+
+ // Markup may have been added, replace with fragment:
+ if ($options->convert_textual_elements) {
+ $fragment = $document->createDocumentFragment();
+ $fragment->appendXML($value);
+ $text->parentNode->replaceChild($fragment, $node);
+ }
+
+ else {
+ $text->nodeValue = $value;
+ }
}
- }
-
- $value = preg_replace($search, $replace, $value);
-
- // Markup may have been added, replace with fragment:
- if ($options->convert_textual_elements) {
- $fragment = $document->createDocumentFragment();
- $fragment->appendXML($value);
- $node->parentNode->replaceChild($fragment, $node);
- }
- else {
- $node->nodeValue = $value;
+ $index--;
}
}
}
View
26 libs/doccy.php
@@ -108,6 +108,7 @@ public function isPrettyPrintable() {
*/
public function isBlockLevel() {
switch (strtolower($this->nodeName)) {
+ case 'data':
case 'a':
case 'abbr':
case 'acronym':
@@ -126,14 +127,29 @@ public function isBlockLevel() {
case 'q':
case 'sub':
case 'sup':
- case 'th':
- case 'td':
- case 'dt':
- case 'dd':
- case 'li':
+ case 'code':
+ case 'samp':
+ case 'kbd':
+ case 'var':
return false;
}
return true;
}
+
+ /**
+ * Does this element contain list items?
+ *
+ * @return boolean
+ */
+ public function isListElement() {
+ switch (strtolower($this->nodeName)) {
+ case 'ol':
+ case 'ul':
+ case 'dl':
+ return true;
+ }
+
+ return false;
+ }
}
View
9 test.php
@@ -5,14 +5,15 @@
$start = microtime(true);
$tpl = new Doccy\Template();
$tpl->formatOutput = true;
- $tpl->parseString("First \"things\" first, CSS(those things) - you'll -- 'need a {code: {em: fresh -- copy}} of & Symphony', so «head» to {a @href http://symphony-cms.com/downloads: the Symphony webite} then download and extract the ... latest release.");
-
- echo '<pre style="white-space: pre-wrap;">';
+ //$tpl->parseString("{p: {em: and finally a colon {code: :dsaasd dsdaasd}}}");
+ $tpl->parseURI('readme.dcy');
foreach ($tpl->documentElement->childNodes as $node) {
- echo htmlentities($tpl->saveXML($node), ENT_NOQUOTES, 'UTF-8'), "\n";
+ echo ($tpl->saveXML($node)), "\n";
}
+ echo '<pre style="white-space: pre-wrap;">';
+
printf(
"\nExecuted in %.6f seconds using %.2fMB of memory.</pre>",
microtime(true) - $start,

0 comments on commit da7e484

Please sign in to comment.