Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More languages #13

Merged
merged 24 commits into from Aug 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2007b96
simplify file write and make_directories
Aug 2, 2020
33f1082
default to adding __init__.py files to directories
Aug 4, 2020
f2fe040
test that non markdown extension is ignored
Aug 2, 2020
f727c9d
add per language test and data
Aug 3, 2020
6472df4
remove python indicator, switch to language
Aug 4, 2020
8ab7fba
introduce default language, lang mappings
Aug 4, 2020
863242e
collect and write per-language code blocks
Aug 4, 2020
7d3c486
make other languages work
Aug 4, 2020
4781972
add test for unmapped extension
Aug 4, 2020
f8556dd
clean up __init__.py addition logic
Aug 5, 2020
833b72d
test __init__.py paths
Aug 5, 2020
fcba42d
correct spelling mistakes
Aug 5, 2020
6b010c2
separate 3rd party imports with a blank line
Aug 5, 2020
8df94d5
cleanup
ryneeverett Aug 6, 2020
4ff9428
Ignore capitalization of language strings.
ryneeverett Aug 7, 2020
ef189db
Remove no-op else clause.
ryneeverett Aug 7, 2020
5007671
Document extension/language mappings.
ryneeverett Aug 7, 2020
acbf213
Language should be reset even if --unsafe.
ryneeverett Aug 9, 2020
b06d077
Add warning when unhinted code blocks are skipped.
ryneeverett Aug 9, 2020
dda1710
Use dash for cli parameters and fix typo.
ryneeverett Aug 10, 2020
9ef06ab
Pull sanity check out of if-statement.
ryneeverett Aug 12, 2020
334e7db
Don't create __init__.py in base output directory.
ryneeverett Aug 12, 2020
6b4589f
Avoid abusing return as a mid-function "break".
ryneeverett Aug 12, 2020
39ea705
Remove redundant test assertion.
ryneeverett Aug 12, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
108 changes: 78 additions & 30 deletions mkcodes.py
Expand Up @@ -13,48 +13,82 @@
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor


def github_codeblocks(filepath, safe):
codeblocks = []
# There does not seem to be any specification for which info strings are
# accepted, but python-markdown passes it directly to pygments, so their
# mapping can be used as a guide:
# https://github.com/pygments/pygments/blob/master/pygments/lexers/_mapping.py
ext_map = {
ryneeverett marked this conversation as resolved.
Show resolved Hide resolved
'cs': ['c#', 'csharp', 'c-sharp'],
'py': ['python', 'python2', 'python3', 'py2', 'py3'],
}
# It's more straightforward to express the mappings by extension, but we
# actually need an inverted mapping.
language_map = {}
for ext, lang_strings in ext_map.items():
for lang_string in lang_strings:
language_map[lang_string] = ext


def github_codeblocks(filepath, safe, default_lang='py'):
codeblocks = {}
codeblock_re = r'^```.*'
codeblock_open_re = r'^```(`*)(py|python){0}$'.format('' if safe else '?')
codeblock_open_re = r'^```(`*)(\w+){0}$'.format('' if safe else '?')

with open(filepath, 'r') as f:
# Initialize State
block = []
python = True
language = None
in_codeblock = False

for line in f.readlines():
# does this line contain a codeblock begin or end?
codeblock_delimiter = re.match(codeblock_re, line)

if in_codeblock:
if codeblock_delimiter:
if python:
codeblocks.append(''.join(block))
# we are closing a codeblock
if language:
# finished a codeblock, append everything
ext = language_map.get(language, language)
codeblocks.setdefault(ext, []).append(''.join(block))
else:
warnings.warn('No language hint found in safe mode. ' +
'Skipping block beginning with: ' +
block[0])

# Reset State
block = []
python = True
language = None
in_codeblock = False
else:
block.append(line)
elif codeblock_delimiter:
# beginning a codeblock
in_codeblock = True
if not re.match(codeblock_open_re, line):
python = False
# does it have a language?
lang_match = re.match(codeblock_open_re, line)
if lang_match:
language = lang_match.group(2)
ryneeverett marked this conversation as resolved.
Show resolved Hide resolved
language = language.lower() if language else language
if not safe:
# we can sub a default language if not safe
language = language or default_lang
return codeblocks


def markdown_codeblocks(filepath, safe):
def markdown_codeblocks(filepath, safe, default_lang='py'):
import markdown

codeblocks = []
codeblocks = {}

if safe:
warnings.warn("'safe' option not available in 'markdown' mode.")

class DoctestCollector(Treeprocessor):
def run(self, root):
nonlocal codeblocks
codeblocks = (block.text for block in root.iterfind('./pre/code'))
codeblocks[default_lang] = (
block.text for block in root.iterfind('./pre/code'))

class DoctestExtension(Extension):
def extendMarkdown(self, md, md_globals):
Expand All @@ -63,7 +97,7 @@ def extendMarkdown(self, md, md_globals):

doctestextension = DoctestExtension()
markdowner = markdown.Markdown(extensions=[doctestextension])
markdowner.convertFile(str(filepath), output=os.devnull)
markdowner.convertFile(input=str(filepath), output=os.devnull)
return codeblocks


Expand All @@ -79,43 +113,57 @@ def get_files(inputs):
elif path.suffix in markdown_extensions:
yield path, path.parent

def add_inits_to_dir(path):

def add_inits_along_path(from_path, to_path):
"""Recursively add __init__.py files to a directory
This compensates for https://bugs.python.org/issue23882 and https://bugs.python.org/issue35617
This compensates for https://bugs.python.org/issue23882
and https://bugs.python.org/issue35617
"""
for child in path.rglob('*'):
if child.is_dir():
(child / '__init__.py').touch()
to_path = to_path.expanduser().resolve()
from_path = from_path.expanduser().resolve()

# Sanity Check: This will raise an exception if paths aren't relative.
to_path.relative_to(from_path)

# Continue recursing if we haven't reached the base output directory.
if to_path != from_path:
(to_path / '__init__.py').touch()
add_inits_along_path(from_path, to_path.parent)


@click.command()
@click.argument(
'inputs', nargs=-1, required=True, type=click.Path(exists=True))
@click.option('--output', default='{name}.py')
@click.option('--output', default='{name}.{ext}')
@click.option('--github/--markdown', default=bool(not markdown_enabled),
help='Github-flavored fence blocks or pure markdown.')
@click.option('--safe/--unsafe', default=True,
help='Allow code blocks without language hints.')
@click.option('--package-python', default=True,
help='Add __init__.py files to python output to aid in test discovery')
def main(inputs, output, github, safe, package_python):
help='Add __init__.py files to python dirs for test discovery')
@click.option('--default-lang', default='py',
help='Assumed language for code blocks without language hints.')
def main(inputs, output, github, safe, package_python, default_lang):
collect_codeblocks = github_codeblocks if github else markdown_codeblocks
outputbasedir = Path(output).parent
outputbasename = Path(output).name

for filepath, input_path in get_files(inputs):
codeblocks = collect_codeblocks(filepath, safe)
codeblocks = collect_codeblocks(filepath, safe, default_lang)

if codeblocks:
fp = Path(filepath)
filedir = fp.parent.relative_to(input_path)
filename = fp.stem
outputfilename = outputbasedir / filedir / outputbasename.format(name=filename)

outputfilename.parent.mkdir(parents=True, exist_ok=True)
outputfilename.write_text('\n\n'.join(codeblocks))
if package_python:
add_inits_to_dir(outputbasedir)


# stitch together the OUTPUT base directory with input directories
# add the file format at the end.
for lang, blocks in codeblocks.items():
outputfilename = outputbasedir / filedir /\
outputbasename.format(name=filename, ext=lang)

# make sure path exists, don't care if it already does
outputfilename.parent.mkdir(parents=True, exist_ok=True)
outputfilename.write_text('\n\n'.join(blocks))
if package_python and lang == 'py':
add_inits_along_path(outputbasedir, outputfilename.parent)
2 changes: 1 addition & 1 deletion tests/data/nest/more/why.md
@@ -1,6 +1,6 @@
# why?

We want to make sure that in more complext documentation structures, which may have multiple sub directories, we are still formatting name and paths correctly.
We want to make sure that in more complex documentation structures, which may have multiple sub directories, we are still formatting name and paths correctly.

```py
import unittest
Expand Down
34 changes: 34 additions & 0 deletions tests/langdata/csharp.md
@@ -0,0 +1,34 @@
# dotNet is still a thing

What if you could provide a code sample here?

```cs
public void Sum(int a, int b)
{
return a + b;
}
```

And we know that it is testable.

```csharp
[Testclass]
public class UnitTest1
{
[TestMethod]
public void TestMethod1()
{
//Arrange
ApplicationToTest.Calc ClassCalc = new ApplicationToTest.Calc();
int expectedResult = 5;

//Act
int result = ClassCalc.Sum(2,3);

//Assert
Assert.AreEqual(expectedResult, result);
}
}
```

Actually checking and running these tests, that's a different matter.
32 changes: 32 additions & 0 deletions tests/langdata/java.md
@@ -0,0 +1,32 @@
# Java documentation is important

That's a language still. Here's a java codeblock:

```java
public class MyUnit {
public String concatenate(String one, String two){
return one + two;
}
}
```

And since we have that class, let's test it

```java
import org.junit.Test;
import static org.junit.Assert.*;

public class MyUnitTest {

@Test
public void testConcatenate() {
MyUnit myUnit = new MyUnit();

String result = myUnit.concatenate("one", "two");

assertEquals("onetwo", result);

}
}

```
34 changes: 34 additions & 0 deletions tests/langdata/multilang.md
@@ -0,0 +1,34 @@
# Comparing and contrasting

For some ideas about an api, we might give getting started code in a simple getting started page.

In a pinch, let's hello that world.

```py
print("hello, world")
```

But maybe we want this to be enterprise grade?

```java
class HelloWorld {
public static void main(String[] args) {
System.out.println("Hello, World!");
}
}
```

New orders from the CTO: let's use Azure cloud.
```cs
class HelloWorld {
static void Main() {
System.Console.WriteLine("Hello World");
}
}
```

We want to have a react vue jquery frontend. Assume that the code sample below has a testable extension as the language

```js
console.log('Hello, world");
```
16 changes: 16 additions & 0 deletions tests/langdata/no_py_tree/clean.md
@@ -0,0 +1,16 @@
# Cleanliness

If there are no python files in a directory, we don't need to add an __init__.py file to that directory. Sure, they don't hurt, but having them where they aren't needed isn't very tidy and might be confusing.

Speaking of confusing, lets test javascript
```js
function assert(condition, message) {
if (!condition) {
message = message || "Assertion failed";
throw new Error(message);
}
}

assert([]+[]=="", "very sensible, adding arrays is a string")
assert({}+[]==0, "of course adding a dict to an array is 0")
```
12 changes: 12 additions & 0 deletions tests/langdata/pytree/buried.md
@@ -0,0 +1,12 @@
# Test discovery

For test discovery to work for unittest, python files generated from this document must have an `__init__.py` file added to the directory - otherwise they won't be considered testable packages.

```python
import unittest

class TestDiscovery(unittest.TestCase):
def test_discovery(self):
self.assertTrue(True)

```
19 changes: 19 additions & 0 deletions tests/test.py
Expand Up @@ -140,6 +140,25 @@ def test_prefixed_deep_blocks(self):
self.assertIn('Ran 2 tests', proc.stderr)
self.assertIn('OK', proc.stderr)

def test_other_languages(self):
self.call(
'--output', 'tests/output/test_{name}.{ext}',
'--github', 'tests/langdata')
self.assertTrue(self._output_path_exists('test_java.java'))
self.assertTrue(self._output_path_exists('test_csharp.cs'))
self.assertFalse(self._output_path_exists('test_csharp.csharp'))
self.assertTrue(self._output_path_exists('test_multilang.cs'))
self.assertTrue(self._output_path_exists('test_multilang.java'))
self.assertTrue(self._output_path_exists('test_multilang.py'))
self.assertTrue(self._output_path_exists('test_multilang.js'))
self.assertTrue(self._output_path_exists('no_py_tree/test_clean.js'))
self.assertFalse(self._output_path_exists('no_py_tree/__init__.py'))
self.assertTrue(self._output_path_exists('pytree/test_buried.py'))
self.assertTrue(self._output_path_exists('pytree/__init__.py'))
ryneeverett marked this conversation as resolved.
Show resolved Hide resolved

# __init__.py should not be created in the base output directory.
self.assertFalse(self._output_path_exists('__init__.py'))

@unittest.skip
def test_glob(self):
raise NotImplementedError
Expand Down