Skip to content

Commit

Permalink
Merge pull request #102 from andreskrey/issue70-code-language
Browse files Browse the repository at this point in the history
Code blocks with specified language
  • Loading branch information
colinodell committed Oct 20, 2016
2 parents 6b5dee9 + 164fb65 commit 7f98188
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 46 deletions.
62 changes: 62 additions & 0 deletions src/Converter/CodeConverter.php
@@ -0,0 +1,62 @@
<?php

namespace League\HTMLToMarkdown\Converter;

use League\HTMLToMarkdown\ElementInterface;

class CodeConverter implements ConverterInterface
{
/**
* @param ElementInterface $element
*
* @return string
*/
public function convert(ElementInterface $element)
{
$language = null;

// Checking for language class on the code block
$classes = $element->getAttribute('class');

if ($classes) {
// Since tags can have more than one class, we need to find the one that starts with 'language-'
$classes = explode(' ', $classes);
foreach ($classes as $class) {
if (strpos($class, 'language-') !== false) {
// Found one, save it as the selected language and stop looping over the classes.
// The space after the language avoids gluing the actual code with the language tag
$language = str_replace('language-', '', $class) . ' ';
break;
}
}
}

$markdown = '';
$code = html_entity_decode($element->getChildrenAsString());

// In order to remove the code tags we need to search for them and, in the case of the opening tag
// use a regular expression to find the tag and the other attributes it might have
$code = preg_replace('/<code\b[^>]*>/', '', $code);
$code = str_replace('</code>', '', $code);

// Checking if the code has multiple lines
$lines = preg_split('/\r\n|\r|\n/', $code);
if (count($lines) > 1) {
// Multiple lines detected, adding three backticks and newlines
$markdown .= '```' . $language . "\n" . $code . "\n" . '```';
} else {
// One line of code, wrapping it on one backtick.
$markdown .= '`' . $language . $code . '`';
}

return $markdown;
}

/**
* @return string[]
*/
public function getSupportedTags()
{
return array('code');
}
}
62 changes: 24 additions & 38 deletions src/Converter/PreformattedConverter.php
Expand Up @@ -13,51 +13,37 @@ class PreformattedConverter implements ConverterInterface
*/
public function convert(ElementInterface $element)
{
// Store the content of the code block in an array, one entry for each line

$markdown = '';

$code_content = html_entity_decode($element->getChildrenAsString());
$code_content = str_replace(array('<code>', '</code>'), '', $code_content);
$code_content = str_replace(array('<pre>', '</pre>'), '', $code_content);
$pre_content = html_entity_decode($element->getChildrenAsString());
$pre_content = str_replace(array('<pre>', '</pre>'), '', $pre_content);

$lines = preg_split('/\r\n|\r|\n/', $code_content);
$total = count($lines);
/*
* Checking for the code tag.
* Usually pre tags are used along with code tags. This conditional will check for already converted code tags,
* which use backticks, and if those backticks are at the beginning and at the end of the string it means
* there's no more information to convert.
*/

// If there's more than one line of code, prepend each line with four spaces and no backticks.
if ($total > 1 || $element->getTagName() === 'pre') {
// Remove the first and last line if they're empty
$first_line = trim($lines[0]);
$last_line = trim($lines[$total - 1]);
$first_line = trim($first_line, '&#xD;'); //trim XML style carriage returns too
$last_line = trim($last_line, '&#xD;');
$firstBacktick = strpos(trim($pre_content), '`');
$lastBacktick = strrpos(trim($pre_content), '`');
if ($firstBacktick === 0 && $lastBacktick === strlen(trim($pre_content)) - 1) {
return $pre_content;
}

if (empty($first_line)) {
array_shift($lines);
}
// If the execution reaches this point it means it's just a pre tag, with no code tag nested

if (empty($last_line)) {
array_pop($lines);
}
// Normalizing new lines
$pre_content = preg_replace('/\r\n|\r|\n/', PHP_EOL, $pre_content);

$count = 1;
foreach ($lines as $line) {
$line = str_replace('&#xD;', '', $line);
$markdown .= ' ' . $line;
// Add newlines, except final line of the code
if ($count !== $total) {
$markdown .= "\n";
}
$count++;
}
$markdown .= "\n";
// Checking if the string has multiple lines
$lines = preg_split('/\r\n|\r|\n/', $pre_content);
if (count($lines) > 1) {
// Multiple lines detected, adding three backticks and newlines
$markdown .= '```' . "\n" . $pre_content . "\n" . '```';
} else {
// There's only one line of code. It's a code span, not a block. Just wrap it with backticks.
$markdown .= '`' . $lines[0] . '`';
}

if ($element->getTagName() === 'pre') {
$markdown = "\n" . $markdown . "\n";
// One line of code, wrapping it on one backtick.
$markdown .= '`' . $pre_content . '`';
}

return $markdown;
Expand All @@ -68,6 +54,6 @@ public function convert(ElementInterface $element)
*/
public function getSupportedTags()
{
return array('pre', 'code');
return array('pre');
}
}
2 changes: 2 additions & 0 deletions src/Environment.php
Expand Up @@ -3,6 +3,7 @@
namespace League\HTMLToMarkdown;

use League\HTMLToMarkdown\Converter\BlockquoteConverter;
use League\HTMLToMarkdown\Converter\CodeConverter;
use League\HTMLToMarkdown\Converter\CommentConverter;
use League\HTMLToMarkdown\Converter\ConverterInterface;
use League\HTMLToMarkdown\Converter\DefaultConverter;
Expand Down Expand Up @@ -83,6 +84,7 @@ public static function createDefaultEnvironment(array $config = array())
$environment = new static($config);

$environment->addConverter(new BlockquoteConverter());
$environment->addConverter(new CodeConverter());
$environment->addConverter(new CommentConverter());
$environment->addConverter(new DivConverter());
$environment->addConverter(new EmphasisConverter());
Expand Down
3 changes: 2 additions & 1 deletion src/HtmlConverter.php
Expand Up @@ -143,7 +143,8 @@ private function createDOMDocument($html)
private function convertChildren(ElementInterface $element)
{
// Don't convert HTML code inside <code> and <pre> blocks to Markdown - that should stay as HTML
if ($element->isDescendantOf(array('pre', 'code'))) {
// except if the current node is a code tag, which needs to be converted by the CodeConverter.
if ($element->isDescendantOf(array('pre', 'code')) && $element->getTagName() !== 'code') {
return;
}

Expand Down
18 changes: 11 additions & 7 deletions tests/HtmlConverterTest.php
Expand Up @@ -122,16 +122,20 @@ public function test_nested_lists()
public function test_code_samples()
{
$this->html_gives_markdown('<code>&lt;p&gt;Some sample HTML&lt;/p&gt;</code>', '`<p>Some sample HTML</p>`');
$this->html_gives_markdown("<code>\n&lt;p&gt;Some sample HTML&lt;/p&gt;\n&lt;p&gt;And another line&lt;/p&gt;\n</code>", " <p>Some sample HTML</p>\n <p>And another line</p>");
$this->html_gives_markdown("<p><code>\n#sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n}\n</code></p>", " #sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n }");
$this->html_gives_markdown("<p><code>#sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n}\n</code></p>", " #sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n }");
$this->html_gives_markdown("<code>\n&lt;p&gt;Some sample HTML&lt;/p&gt;\n&lt;p&gt;And another line&lt;/p&gt;\n</code>", "```\n\n<p>Some sample HTML</p>\n<p>And another line</p>\n\n```");
$this->html_gives_markdown("<p><code>\n#sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n}\n</code></p>", "```\n\n#sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n}\n\n```");
$this->html_gives_markdown("<p><code>#sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n}\n</code></p>", "```\n#sidebar h1 {\n font-size: 1.5em;\n font-weight: bold;\n}\n\n```");
$this->html_gives_markdown('<pre><code>&lt;p&gt;Some sample HTML&lt;/p&gt;</code></pre>', '`<p>Some sample HTML</p>`');
$this->html_gives_markdown('<pre><code class="language-php">&lt;?php //Some php code ?&gt;</code></pre>', '`php <?php //Some php code ?>`');
$this->html_gives_markdown("<pre><code class=\"language-php\">&lt;?php //Some multiline php code\n\$myVar = 2; ?&gt;</code></pre>", "```php \n<?php //Some multiline php code\n\$myVar = 2; ?>\n```");
$this->html_gives_markdown("<pre><code>&lt;p&gt;Multiline HTML&lt;/p&gt;\n&lt;p&gt;Here's the second line&lt;/p&gt;</code></pre>", "```\n<p>Multiline HTML</p>\n<p>Here's the second line</p>\n```");
}

public function test_preformat()
{
$this->html_gives_markdown("<pre>test\ntest\r\ntest</pre>", ' test' . PHP_EOL . ' test' . PHP_EOL . ' test');
$this->html_gives_markdown("<pre>test\n\ttab\r\n</pre>", ' test' . PHP_EOL . " \ttab");
$this->html_gives_markdown('<pre> one line with spaces </pre>', ' ' . ' one line with spaces ');
$this->html_gives_markdown("<pre>test\ntest\r\ntest</pre>", "```\ntest" . PHP_EOL . 'test' . PHP_EOL . "test\n```");
$this->html_gives_markdown("<pre>test\n\ttab\r\n</pre>", "```\ntest" . PHP_EOL . "\ttab" . PHP_EOL . "\n```");
$this->html_gives_markdown('<pre> one line with spaces </pre>', '` one line with spaces `');
}

public function test_blockquotes()
Expand Down Expand Up @@ -210,7 +214,7 @@ public function test_invoke()
public function test_sanitization()
{
$html = '<pre><code>&lt;script type = "text/javascript"&gt; function startTimer() { var tim = window.setTimeout("hideMessage()", 5000) } &lt;/head&gt; &lt;body&gt;</code></pre>';
$markdown = ' <script type = "text/javascript"> function startTimer() { var tim = window.setTimeout("hideMessage()", 5000) } </head> <body>';
$markdown = '`<script type = "text/javascript"> function startTimer() { var tim = window.setTimeout("hideMessage()", 5000) } </head> <body>`';
$this->html_gives_markdown($html, $markdown);
$this->html_gives_markdown('<p>&gt; &gt; Look at me! &lt; &lt;</p>', '\> > Look at me! < <');
$this->html_gives_markdown('<p>&gt; &gt; <b>Look</b> at me! &lt; &lt;<br />&gt; Just look at me!</p>', "\\> > **Look** at me! < < \n\\> Just look at me!");
Expand Down

0 comments on commit 7f98188

Please sign in to comment.