Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

initial import

  • Loading branch information...
commit 7a083c5daec58164ab95316a53013dd2521225cc 0 parents
@knieriem authored
14 .hgignore
@@ -0,0 +1,14 @@
+# use glob syntax.
+syntax: glob
+
+parser.leg.go
+cmd/markdown
+orig-c-src
+core
++*
+*.orig
+[58].*
+*.[58]
+,*
+*~
+[#]*
78 LICENSE
@@ -0,0 +1,78 @@
+markdown in Go, implemented using PEG grammar
+
+Copyright (c) 2010 Michael Teichgräber
+
+This is a translation of peg-markdown, written
+by John MacFarlane, into Go:
+
+Copyright (c) 2008 John MacFarlane
+
+peg-markdown is released under both the GPL and MIT licenses.
+You may pick the license that best fits your needs.
+
+The GPL
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+The MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+peg (http://github.com/pointlander/peg),
+ based on http://piumarta.com/software/peg/,
+written by Andrew J Snodgrass.
+
+Modifications to support LE grammars by Michael Teichgräber
+
+Copyright (c) 2010, Go Authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation and/or
+ other materials provided with the distribution.
+ * Neither the name of the Go Authors nor the names of its contributors may be used to
+ endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
69 Makefile
@@ -0,0 +1,69 @@
+include $(GOROOT)/src/Make.inc
+
+TARG=markdown
+GOFILES=\
+ markdown.go\
+ output.go\
+ parser.leg.go\
+
+package:
+
+include $(GOROOT)/src/Make.pkg
+
+all: cmd
+
+#
+# mdtest runs MarkdownTests-1.0.3 that come with original C sources
+#
+mdtest: package cmd orig-c-src
+ make -C cmd test
+
+cmd: package
+ make -C cmd
+
+
+CLEANFILES=\
+ parser.leg.go\
+ _obj\
+ ,,c\
+ ,,fmt\
+
+distclean: clean clean-sub
+ rm -rf orig-c-src
+
+clean-sub:
+ for dir in cmd peg peg/leg; do make -C $$dir clean; done
+
+
+#
+# LEG parser generator stuff
+#
+LEG = ./peg/leg/leg
+%.leg.go: %.leg $(LEG)
+ $(LEG) $<
+
+$(LEG):
+ make -C peg all
+ make -C peg/leg all
+
+peg:
+
+
+#
+# access to original C source files
+#
+VCS = git
+# also, if hggit extension is available:
+# VCS = hg
+
+orig-c-src:
+ $(VCS) clone git://github.com/jgm/peg-markdown.git $@
+
+
+
+include misc/devel.mk
+
+.PHONY: \
+ cmd\
+ distclean\
+ mdtest\
214 README.peg-markdown
@@ -0,0 +1,214 @@
+What is this?
+=============
+
+This is an implementation of John Gruber's [markdown][] in C. It uses a
+[parsing expression grammar (PEG)][] to define the syntax. This should
+allow easy modification and extension. It currently supports output in
+HTML, LaTeX, or groff_mm formats, and adding new formats is relatively
+easy.
+
+[parsing expression grammar (PEG)]: http://en.wikipedia.org/wiki/Parsing_expression_grammar
+[markdown]: http://daringfireball.net/projects/markdown/
+
+It is pretty fast. A 179K text file that takes 5.7 seconds for
+Markdown.pl (v. 1.0.1) to parse takes less than 0.2 seconds for this
+markdown. It does, however, use a lot of memory (up to 4M of heap space
+while parsing the 179K file, and up to 80K for a 4K file). (Note that
+the memory leaks in earlier versions of this program have now been
+plugged.)
+
+Both a library and a standalone program are provided.
+
+peg-markdown is written and maintained by John MacFarlane (jgm on
+github), with significant contributions by Ryan Tomayko (rtomayko).
+It is released under both the GPL and the MIT license; see LICENSE for
+details.
+
+Installing
+==========
+
+On a linux or unix-based system
+-------------------------------
+
+This program is written in portable ANSI C. It requires
+[glib2](http://www.gtk.org/download.html). Most *nix systems will have
+this installed already. The build system requires GNU make.
+
+The other required dependency, [Ian Piumarta's peg/leg PEG parser
+generator](http://piumarta.com/software/peg/), is included in the source
+directory. It will be built automatically. (However, it is not as portable
+as peg-markdown itself, and seems to require gcc.)
+
+To make the 'markdown' executable:
+
+ make
+
+(Or, on some systems, `gmake`.) Then, for usage instructions:
+
+ ./markdown --help
+
+To run John Gruber's Markdown 1.0.3 test suite:
+
+ make test
+
+The test suite will fail on one of the list tests. Here's why.
+Markdown.pl encloses "item one" in the following list in `<p>` tags:
+
+ 1. item one
+ * subitem
+ * subitem
+
+ 2. item two
+
+ 3. item three
+
+peg-markdown does not enclose "item one" in `<p>` tags unless it has a
+following blank line. This is consistent with the official markdown
+syntax description, and lets the author of the document choose whether
+`<p>` tags are desired.
+
+Cross-compiling for Windows with MinGW on a linux box
+-----------------------------------------------------
+
+Prerequisites:
+
+* Linux system with MinGW cross compiler For Ubuntu:
+
+ sudo apt-get install mingw32
+
+* [Windows glib-2.0 binary & development files](http://www.gtk.org/download-windows.html).
+ Unzip files into cross-compiler directory tree (e.g., `/usr/i586-mingw32msvc`).
+
+Steps:
+
+1. Create the markdown parser using Linux-compiled `leg` from peg-0.1.4:
+
+ ./peg-0.1.4/leg markdown_parser.leg >markdown_parser.c
+
+ (Note: The same thing could be accomplished by cross-compiling leg,
+ executing it on Windows, and copying the resulting C file to the Linux
+ cross-compiler host.)
+
+2. Run the cross compiler with include flag for the Windows glib-2.0 headers:
+ for example,
+
+ /usr/bin/i586-mingw32msvc-cc -c \
+ -I/usr/i586-mingw32msvc/include/glib-2.0 \
+ -I/usr/i586-mingw32msvc/lib/glib-2.0/include -Wall -O3 -ansi markdown*.c
+
+3. Link against Windows glib-2.0 headers: for example,
+
+ /usr/bin/i586-mingw32msvc-cc markdown*.o \
+ -Wl,-L/usr/i586-mingw32msvc/lib/glib,--dy,--warn-unresolved-symbols,-lglib-2.0 \
+ -o markdown.exe
+
+The resulting executable depends on the glib dll file, so be sure to
+load the glib binary on the Windows host.
+
+Compiling with MinGW on Windows
+-------------------------------
+
+These directions assume that MinGW is installed in `c:\MinGW` and glib-2.0
+is installed in the MinGW directory hierarchy (with the mingw bin directory
+in the system path).
+
+Unzip peg-markdown in a temp directory. From the directory with the
+peg-markdown source, execute:
+
+ cd peg-0.1.4
+ for %i in (*.c) do @gcc -g -Wall -O3 -DNDEBUG -c -o %~ni.o %i
+ gcc -o leg.exe leg.o tree.o compile.o
+ cd ..
+ peg-0.1.4\leg.exe markdown_parser.leg >markdown_parser.c
+ @for %i in (markdown*.c) do @gcc -mms-bitfields -Ic:/MinGW/include/glib-2.0 -Ic:/MinGW/lib/glib-2.0/include -c -o %~ni.o %i
+ gcc -O3 -Lc:/MinGW/lib/glib-2.0 -lglib-2.0 -lintl markdown.o markdown_lib.o markdown_output.o markdown_parser.o -o markdown.exe -Wl,--dy,--warn-unresolved-symbols,-lglib-2.0,-Lc:/MinGW/lib/glib-2.0,-lglib-2.0,-lintl
+
+(Windows instructions courtesy of Matt Wolf.)
+
+Extensions
+==========
+
+peg-markdown supports extensions to standard markdown syntax.
+These can be turned on using the command line flag `-x` or
+`--extensions`. `-x` by itself turns on all extensions. Extensions
+can also be turned on selectively, using individual command-line
+options. To see the available extensions:
+
+ ./markdown --help-extensions
+
+The `--smart` extension provides "smart quotes", dashes, and ellipses.
+
+The `--notes` extension provides a footnote syntax like that of
+Pandoc or PHP Markdown Extra.
+
+Using the library
+=================
+
+The library exports two functions:
+
+ GString * markdown_to_g_string(char *text, int extensions, int output_format);
+ char * markdown_to_string(char *text, int extensions, int output_format);
+
+The only difference between these is that `markdown_to_g_string` returns a
+`GString` (glib's automatically resizable string), while `markdown_to_string`
+returns a regular character pointer. The memory allocated for these must be
+freed by the calling program, using `g_string_free()` or `free()`.
+
+`text` is the markdown-formatted text to be converted. Note that tabs will
+be converted to spaces, using a four-space tab stop. Character encodings are
+ignored.
+
+`extensions` is a bit-field specifying which syntax extensions should be used.
+If `extensions` is 0, no extensions will be used. If it is `0xFFFFFF`,
+all extensions will be used. To set extensions selectively, use the
+bitwise `&` operator and the following constants:
+
+ - `EXT_SMART` turns on smart quotes, dashes, and ellipses.
+ - `EXT_NOTES` turns on footnote syntax. [Pandoc's footnote syntax][] is used here.
+ - `EXT_FILTER_HTML` filters out raw HTML (except for styles).
+ - `EXT_FILTER_STYLES` filters out styles in HTML.
+
+ [Pandoc's footnote syntax]: http://johnmacfarlane.net/pandoc/README.html#footnotes
+
+`output_format` is either `HTML_FORMAT`, `LATEX_FORMAT`, or `GROFF_MM_FORMAT`.
+
+To use the library, include `markdown_lib.h`. See `markdown.c` for an example.
+
+Hacking
+=======
+
+It should be pretty easy to modify the program to produce other formats
+than HTML or LaTeX, and to parse syntax extensions. A quick guide:
+
+ * `markdown_parser.leg` contains the grammar itself.
+
+ * `markdown_output.c` contains functions for printing the `Element`
+ structure in various output formats.
+
+ * To add an output format, add the format to `markdown_formats` in
+ `markdown_lib.h`. Then modify `print_element` in `markdown_output.c`,
+ and add functions `print_XXXX_string`, `print_XXXX_element`, and
+ `print_XXXX_element_list`. Also add an option in the main program
+ that selects the new format. Don't forget to add it to the list of
+ formats in the usage message.
+
+ * To add syntax extensions, define them in the PEG grammar
+ (`markdown_parser.leg`), using existing extensions as a guide. New
+ inline elements will need to be added to `Inline =`; new block
+ elements will need to be added to `Block =`. (Note: the order
+ of the alternatives does matter in PEG grammars.)
+
+ * If you need to add new types of elements, modify the `keys`
+ enum in `markdown_peg.h`.
+
+ * By using `&{ }` rules one can selectively disable extensions
+ depending on command-line options. For example,
+ `&{ extension(EXT_SMART) }` succeeds only if the `EXT_SMART` bit
+ of the global `syntax_extensions` is set. Add your option to
+ `markdown_extensions` in `markdown_lib.h`, and add an option in
+ `markdown.c` to turn on your extension.
+
+ * Note: Avoid using `[^abc]` character classes in the grammar, because
+ they cause problems with non-ascii input. Instead, use: `( !'a' !'b'
+ !'c' . )`
+
15 cmd/Makefile
@@ -0,0 +1,15 @@
+include $(GOROOT)/src/Make.inc
+
+TARG=markdown
+GOFILES=\
+ main.go\
+
+R = ..
+PREREQ += $(R)/_obj/markdown.a
+
+include $(GOROOT)/src/Make.cmd
+
+
+test: $(TARG)
+ cd $(R)/orig-c-src/MarkdownTest_1.0.3; \
+ ./MarkdownTest.pl --script=../../cmd/$< --tidy
41 cmd/main.go
@@ -0,0 +1,41 @@
+package main
+
+import (
+ md "../_obj/markdown"
+ "flag"
+ "fmt"
+ "os"
+ "bufio"
+ "io/ioutil"
+)
+
+func main() {
+ var b []byte
+
+ flag.Usage = func() {
+ fmt.Fprintf(os.Stderr, "Usage: %s [FILE]\n", os.Args[0])
+ flag.PrintDefaults()
+ }
+ optNotes := flag.Bool("notes", false, "turn on footnote syntax")
+ optSmart := flag.Bool("smart", false, "turn on smart quotes, dashes, and ellipses")
+ flag.Parse()
+
+ if flag.NArg() > 0 {
+ b, _ = ioutil.ReadFile(flag.Arg(0))
+ } else {
+ b, _ = ioutil.ReadAll(os.Stdin)
+ }
+
+ e := 0
+ if *optNotes {
+ e |= md.EXT_NOTES
+ }
+ if *optSmart {
+ e |= md.EXT_SMART
+ }
+
+ doc := md.Parse(string(b), e)
+ w := bufio.NewWriter(os.Stdout)
+ doc.WriteHtml(w)
+ w.Flush()
+}
25 doc.go
@@ -0,0 +1,25 @@
+/*
+A translation of peg-markdown [1] into Go.
+
+Usage example:
+
+ import (
+ md "markdown"
+ "os"
+ "io/ioutil"
+ "bufio"
+ )
+
+ func main() {
+ b, _ := ioutil.ReadAll(os.Stdin)
+
+ doc := md.Parse(string(b), md.EXT_SMART)
+
+ w := bufio.NewWriter(os.Stdout)
+ doc.WriteHtml(w)
+ w.Flush()
+ }
+
+[1]: https://github.com/jgm/peg-markdown/
+*/
+package markdown
138 markdown.go
@@ -0,0 +1,138 @@
+/* Original C version https://github.com/jgm/peg-markdown/
+ * Copyright 2008 John MacFarlane (jgm at berkeley dot edu).
+ *
+ * Modifications and translation from C into Go
+ * based on markdown_lib.c and parsing_functions.c
+ * Copyright 2010 Michael Teichgräber (mt at wmipf dot de)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License or the MIT
+ * license. See LICENSE for details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+package markdown
+
+// implements Parse()
+
+import (
+ "strings"
+)
+
+// Markdown Extensions:
+const (
+ EXT_SMART = 1 << iota
+ EXT_NOTES
+ EXT_FILTER_HTML
+ EXT_FILTER_STYLES
+)
+
+// Parse converts a Markdown document into a tree for later output processing.
+func Parse(text string, extFlags int) *Doc {
+ d := new(Doc)
+ d.syntaxExtensions = extFlags
+
+ s := preformat(text)
+
+ d.parseRule(ruleReferences, s)
+ if extFlags&EXT_NOTES != 0 {
+ d.parseRule(ruleNotes, s)
+ }
+ raw := d.parseMarkdown(s)
+ d.tree = d.processRawBlocks(raw)
+ return d
+}
+
+func (d *Doc) parseRule(rule int, s string) {
+ m := new(yyParser)
+ m.Doc = d
+ m.Init()
+ m.Buffer = s
+ if !m.Parse(rule) {
+ m.PrintError()
+ }
+}
+
+func (d *Doc) parseMarkdown(text string) *element {
+ m := new(yyParser)
+ m.Doc = d
+ m.Init()
+ m.Buffer = text
+ m.Parse(ruleDoc)
+ return d.tree
+}
+
+
+/* process_raw_blocks - traverses an element list, replacing any RAW elements with
+ * the result of parsing them as markdown text, and recursing into the children
+ * of parent elements. The result should be a tree of elements without any RAWs.
+ */
+func (d *Doc) processRawBlocks(input *element) *element {
+ var last_child *element
+
+ for current := input; current != nil; current = current.next {
+ if current.key == RAW {
+ /* \001 is used to indicate boundaries between nested lists when there
+ * is no blank line. We split the string by \001 and parse
+ * each chunk separately.
+ */
+ current.key = LIST
+ current.children = nil
+ for _, contents := range strings.Split(current.contents.str, "\001", -1) {
+ list := d.parseMarkdown(contents)
+ if current.children == nil {
+ current.children = list
+ last_child = list
+ } else {
+ last_child.next = list
+ }
+ for last_child.next != nil {
+ last_child = last_child.next
+ }
+ }
+ current.contents.str = ""
+ }
+ if current.children != nil {
+ current.children = d.processRawBlocks(current.children)
+ }
+ }
+ return input
+}
+
+
+const (
+ TABSTOP = 4
+)
+
+/* preformat - allocate and copy text buffer while
+ * performing tab expansion.
+ */
+func preformat(text string) (s string) {
+ charstotab := TABSTOP
+ i0 := 0
+
+ for i, _ := range text {
+ switch text[i] {
+ case '\t':
+ s += text[i0:i]
+ for ; charstotab > 0; charstotab-- {
+ s += " "
+ }
+ i0 = i + 1
+ case '\n':
+ s += text[i0 : i+1]
+ i0 = i + 1
+ charstotab = TABSTOP
+ default:
+ charstotab--
+ }
+ if charstotab == 0 {
+ charstotab = TABSTOP
+ }
+ }
+ return s + text[i0:] + "\n\n"
+}
37 misc/c2go.sed
@@ -0,0 +1,37 @@
+# this sed script replaces some bits of the original leg file
+# to make it more similar to the Go version, thus avoiding
+# to many differences
+
+/\$\$/ {
+ s,\$\$->,$$.,g
+ /\$\$[^}]*$/s,\; *$,,g
+}
+
+s,parse_result,p.tree,
+s,references,p.references,
+s,notes,p.notes,
+s,find_reference,p.findReference,g
+
+s,->key,.key,g
+s,->children,.children,g
+s,->contents.str,.contents.str,g
+
+/{ *if (extens/ {
+ s,if (,if ,
+ s,)),),
+}
+/EXT/ s,if extension,if p.extension,
+/EXT/ s,{ *extension,{ p.extension,g
+/EXT/ s,{ *!extension,{ !p.extension,g
+
+s,{ *element \*[a-z]*\; *$,{,
+
+/raw\.key =/ s,;$,,
+/result =/ s,;$,,
+s,result = mk_el,result := mk_el,
+
+s,NULL,nil,g
+
+s, *\; *}, },g
+
+s,strlen(,len(,g
16 misc/devel.mk
@@ -0,0 +1,16 @@
+#
+# development utilities
+#
+gofmt:
+ rc ./misc/gofmt.rc
+
+diff: ,,c
+ tkdiff $< parser.leg
+
+,,c: orig-c-src/markdown_parser.leg
+ sed -f misc/c2go.sed < $< > $@
+
+orig-c-src/markdown_parser.leg: orig-c-src
+
+
+.PHONY: diff gofmt
51 misc/gofmt.rc
@@ -0,0 +1,51 @@
+fmtopts=('-spaces=false' '-tabwidth=4')
+
+fn f{
+ gofmt $fmtopts -s ../$1 > $1
+ diff $1
+}
+
+fn diff{
+ if (! cmp -s ../$1 $1) {
+ echo tkdiff $1 ,,fmt/$1
+ }
+}
+
+if(! test -d ,,fmt)
+ mkdir ,,fmt
+cd ,,fmt
+
+
+# split parser.leg into Go and leg parts, gofmt the Go parts,
+# and combine pieces to form a parser.leg again
+p=../parser.leg
+<$p >,,leg awk '
+ /^%%/ { copy=0 }
+ copy { print }
+ /^%}/ { copy=1 }
+'
+
+<$p >,,p.go awk '
+ /^%%/ { copy=1; $0 = "//" $0 }
+ /^%}/ { copy=0 }
+ copy { print }
+ /^%{/ { copy=1 }
+'
+gofmt $fmtopts -w -s ,,p.go
+
+>parser.leg {
+ echo '%{'
+ cat ,,p.go | sed '/^\/\/%%/,$d'
+ echo '%}'
+ cat ,,leg
+ echo %%
+ cat ,,p.go | sed '1,/^\/\/%%/d'
+}
+
+
+f doc.go
+f markdown.go
+f output.go
+diff parser.leg
+
+~ 0 0
229 output.go
@@ -0,0 +1,229 @@
+/* Original C version https://github.com/jgm/peg-markdown/
+ * Copyright 2008 John MacFarlane (jgm at berkeley dot edu).
+ *
+ * Modifications and translation from C into Go
+ * based on markdown_output.c
+ * Copyright 2010 Michael Teichgräber (mt at wmipf dot de)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License or the MIT
+ * license. See LICENSE for details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+package markdown
+
+// HTML output functions
+
+import (
+ "os"
+ "fmt"
+ "log"
+ "rand"
+ "strings"
+)
+
+type Writer interface {
+ WriteString(string) (int, os.Error)
+ WriteRune(int) (int, os.Error)
+ WriteByte(byte) os.Error
+}
+
+type htmlOut struct {
+ Writer
+ padded int
+
+ notenum int
+ endNotes []*element /* List of endnotes to print after main content. */
+}
+
+// WriteHtml prints a document tree in HTML format using the specified Writer.
+//
+func (d *Doc) WriteHtml(w Writer) int {
+ out := new(htmlOut)
+ out.Writer = w
+
+ out.padded = 2
+ out.elist(d.tree, false)
+ out.pad(2)
+ out.printEndnotes()
+
+ return 0
+}
+
+// pad - add newlines if needed
+func (h *htmlOut) pad(n int) *htmlOut {
+ for ; n > h.padded; n-- {
+ h.WriteByte('\n')
+ }
+ h.padded = n
+ return h
+}
+
+func (h *htmlOut) pset(n int) *htmlOut {
+ h.padded = n
+ return h
+}
+
+// print a string
+func (w *htmlOut) s(s string) *htmlOut {
+ w.WriteString(s)
+ return w
+}
+
+
+/* print string, escaping for HTML
+ * If obfuscate selected, convert characters to hex or decimal entities at random
+ */
+func (w *htmlOut) str(hs string, obfuscate bool) *htmlOut {
+ for _, r := range hs {
+ switch r {
+ case '&':
+ w.s("&amp;")
+ case '<':
+ w.s("&lt;")
+ case '>':
+ w.s("&gt;")
+ case '"':
+ w.s("&quot;")
+ default:
+ if obfuscate {
+ if rand.Intn(1) == 0 {
+ w.s(fmt.Sprintf("&#%d;", r))
+ } else {
+ w.s(fmt.Sprintf("&#%x;", r))
+ }
+ } else {
+ w.WriteRune(r)
+ }
+ }
+ }
+ return w
+}
+
+/* print a list of elements
+ */
+func (w *htmlOut) elist(list *element, obfuscate bool) *htmlOut {
+ for list != nil {
+ w.elem(list, obfuscate)
+ list = list.next
+ }
+ return w
+}
+
+// print an element
+func (w *htmlOut) elem(elt *element, obfuscate bool) *htmlOut {
+ var s string
+
+ switch elt.key {
+ case SPACE:
+ s = elt.contents.str
+ case LINEBREAK:
+ s = "<br/>\n"
+ case STR:
+ w.str(elt.contents.str, obfuscate)
+ case ELLIPSIS:
+ s = "&hellip;"
+ case EMDASH:
+ s = "&mdash;"
+ case ENDASH:
+ s = "&ndash;"
+ case APOSTROPHE:
+ s = "&rsquo;"
+ case SINGLEQUOTED:
+ w.s("&lsquo;").elist(elt.children, obfuscate).s("&rsquo;")
+ case DOUBLEQUOTED:
+ w.s("&ldquo;").elist(elt.children, obfuscate).s("&rdquo;")
+ case CODE:
+ w.s("<code>").str(elt.contents.str, obfuscate).s("</code>")
+ case HTML:
+ s = elt.contents.str
+ case LINK:
+ if strings.Index(elt.contents.link.url, "mailto:") == 0 {
+ obfuscate = true /* obfuscate mailto: links */
+ }
+ w.s(`<a href="`).str(elt.contents.link.url, obfuscate).s(`"`)
+ if len(elt.contents.link.title) > 0 {
+ w.s(` title="`).str(elt.contents.link.title, obfuscate).s(`"`)
+ }
+ w.s(">").elist(elt.contents.link.label, obfuscate).s("</a>")
+ case IMAGE:
+ w.s(`<img src="`).str(elt.contents.link.url, obfuscate).s(`" alt="`)
+ w.elist(elt.contents.link.label, obfuscate).s(`"`)
+ if len(elt.contents.link.title) > 0 {
+ w.s(` title="`).str(elt.contents.link.title, obfuscate).s(`"`)
+ }
+ w.s(" />")
+ case EMPH:
+ w.s("<em>").elist(elt.children, obfuscate).s("</em>")
+ case STRONG:
+ w.s("<strong>").elist(elt.children, obfuscate).s("</strong>")
+ case LIST:
+ w.elist(elt.children, obfuscate)
+ case RAW:
+ /* Shouldn't occur - these are handled by process_raw_blocks() */
+ log.Exitf("RAW")
+ case H1, H2, H3, H4, H5, H6:
+ h := fmt.Sprintf("h%1d>", elt.key-H1+1) /* assumes H1 ... H6 are in order */
+ w.pad(2).s("<").s(h).elist(elt.children, obfuscate).s("</").s(h).pset(0)
+ case PLAIN:
+ w.pad(1).elist(elt.children, obfuscate).pset(0)
+ case PARA:
+ w.pad(2).s("<p>").elist(elt.children, obfuscate).s("</p>").pset(0)
+ case HRULE:
+ w.pad(2).s("<hr />").pset(0)
+ case HTMLBLOCK:
+ w.pad(2).s(elt.contents.str).pset(0)
+ case VERBATIM:
+ w.pad(2).s("<pre><code>").str(elt.contents.str, obfuscate).s("</code></pre>").pset(0)
+ case BULLETLIST:
+ w.pad(2).s("<ul>").pset(0).elist(elt.children, obfuscate).pad(1).s("</ul>").pset(0)
+ case ORDEREDLIST:
+ w.pad(2).s("<ol>").pset(0).elist(elt.children, obfuscate).pad(1).s("</ol>").pset(0)
+ case LISTITEM:
+ w.pad(1).s("<li>").pset(2).elist(elt.children, obfuscate).s("</li>").pset(0)
+ case BLOCKQUOTE:
+ w.pad(2).s("<blockquote>\n").pset(2).elist(elt.children, obfuscate).pad(1).s("</blockquote>").pset(0)
+ case REFERENCE:
+ /* Nonprinting */
+ case NOTE:
+ /* if contents.str == 0, then print note; else ignore, since this
+ * is a note block that has been incorporated into the notes list
+ */
+ if elt.contents.str == "" {
+ w.endNotes = append(w.endNotes, elt) /* add an endnote to global endnotes list */
+ w.notenum++
+ nn := w.notenum
+ s = fmt.Sprintf(`<a class="noteref" id="fnref%d" href="#fn%d" title="Jump to note %d">[%d]</a>`,
+ nn, nn, nn, nn)
+ }
+ default:
+ log.Exitf("htmlOut.elem encountered unknown element key = %d\n", elt.key)
+ }
+ if s != "" {
+ w.s(s)
+ }
+ return w
+}
+
+
+func (w *htmlOut) printEndnotes() {
+ counter := 0
+
+ if len(w.endNotes) == 0 {
+ return
+ }
+ w.s("<hr/>\n<ol id=\"notes\">")
+ for _, elt := range w.endNotes {
+ counter++
+ w.pad(1).s(fmt.Sprintf("<li id=\"fn%d\">\n", counter)).pset(2)
+ w.elist(elt.children, false)
+ w.s(fmt.Sprintf(" <a href=\"#fnref%d\" title=\"Jump back to reference\">[back]</a>", counter))
+ w.pad(1).s("</li>")
+ }
+ w.pad(1).s("</ol>")
+}
952 parser.leg
@@ -0,0 +1,952 @@
+%{
+/* Original C version https://github.com/jgm/peg-markdown/
+ * Copyright 2008 John MacFarlane (jgm at berkeley dot edu).
+ *
+ * Modifications and translation from C into Go
+ * based on markdown_parser.leg and utility_functions.c
+ * Copyright 2010 Michael Teichgräber (mt at wmipf dot de)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License or the MIT
+ * license. See LICENSE for details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+package markdown
+
+// PEG grammar and parser actions for markdown syntax.
+
+import (
+ "fmt"
+ "strings"
+ "log"
+)
+
+// Semantic value of a parsing action.
+type element struct {
+ key int
+ contents
+ children *element
+ next *element
+}
+
+// Information (label, URL and title) for a link.
+type link struct {
+ label *element
+ url string
+ title string
+}
+
+// Union for contents of an Element (string, list, or link).
+type contents struct {
+ str string
+ *link
+}
+
+// Types of semantic values returned by parsers.
+const (
+ LIST = iota /* A generic list of values. For ordered and bullet lists, see below. */
+ RAW /* Raw markdown to be processed further */
+ SPACE
+ LINEBREAK
+ ELLIPSIS
+ EMDASH
+ ENDASH
+ APOSTROPHE
+ SINGLEQUOTED
+ DOUBLEQUOTED
+ STR
+ LINK
+ IMAGE
+ CODE
+ HTML
+ EMPH
+ STRONG
+ PLAIN
+ PARA
+ LISTITEM
+ BULLETLIST
+ ORDEREDLIST
+ H1 /* Code assumes that H1..6 are in order. */
+ H2
+ H3
+ H4
+ H5
+ H6
+ BLOCKQUOTE
+ VERBATIM
+ HTMLBLOCK
+ HRULE
+ REFERENCE
+ NOTE
+ numVAL
+)
+
+type Doc struct {
+ tree *element /* Results of parse. */
+ references *element /* List of link references found. */
+ notes *element /* List of footnotes found. */
+ syntaxExtensions int /* Syntax extensions selected. */
+}
+
+%}
+
+%userstate *Doc
+
+%YYSTYPE *element
+
+
+Doc = a:StartList ( Block { a = cons($$, a) } )*
+ { p.tree = reverse(a) }
+ commit
+
+Block = BlankLine*
+ ( BlockQuote
+ | Verbatim
+ | Note
+ | Reference
+ | HorizontalRule
+ | Heading
+ | OrderedList
+ | BulletList
+ | HtmlBlock
+ | StyleBlock
+ | Para
+ | Plain )
+
+Para = NonindentSpace a:Inlines BlankLine+
+ { $$ = a; $$.key = PARA }
+
+Plain = a:Inlines
+ { $$ = a; $$.key = PLAIN }
+
+AtxInline = !Newline !(Sp '#'* Sp Newline) Inline
+
+AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) >
+ { $$ = mk_element(H1 + (len(yytext) - 1)) }
+
+AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a) } )+ (Sp '#'* Sp)? Newline
+ { $$ = mk_list(s.key, a)
+ s = nil }
+
+SetextHeading = SetextHeading1 | SetextHeading2
+
+SetextHeading1 = a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline "===" '='* Newline
+ { $$ = mk_list(H1, a) }
+
+SetextHeading2 = a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline "---" '-'* Newline
+ { $$ = mk_list(H2, a) }
+
+Heading = AtxHeading | SetextHeading
+
+BlockQuote = a:BlockQuoteRaw
+ { $$ = mk_element(BLOCKQUOTE)
+ $$.children = a
+ }
+
+BlockQuoteRaw = a:StartList
+ (( '>' ' '? Line { a = cons($$, a) } )
+ ( !'>' !BlankLine Line { a = cons($$, a) } )*
+ ( BlankLine { a = cons(mk_str("\n"), a) } )*
+ )+
+ { $$ = mk_str_from_list(a, true)
+ $$.key = RAW
+ }
+
+NonblankIndentedLine = !BlankLine IndentedLine
+
+VerbatimChunk = a:StartList
+ ( BlankLine { a = cons(mk_str("\n"), a) } )*
+ ( NonblankIndentedLine { a = cons($$, a) } )+
+ { $$ = mk_str_from_list(a, false) }
+
+Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a) } )+
+ { $$ = mk_str_from_list(a, false)
+ $$.key = VERBATIM }
+
+HorizontalRule = NonindentSpace
+ ( '*' Sp '*' Sp '*' (Sp '*')*
+ | '-' Sp '-' Sp '-' (Sp '-')*
+ | '_' Sp '_' Sp '_' (Sp '_')*)
+ Sp Newline BlankLine+
+ { $$ = mk_element(HRULE) }
+
+Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+
+
+BulletList = &Bullet (ListTight | ListLoose)
+ { $$.key = BULLETLIST }
+
+ListTight = a:StartList
+ ( ListItem { a = cons($$, a) } )+
+ BlankLine* !(Bullet | Enumerator)
+ { $$ = mk_list(LIST, a) }
+
+ListLoose = a:StartList
+ ( b:ListItem BlankLine*
+ {
+ li := b.children
+ li.contents.str += "\n\n"
+ a = cons(b, a)
+ } )+
+ { $$ = mk_list(LIST, a) }
+
+ListItem = ( Bullet | Enumerator )
+ a:StartList
+ ListBlock { a = cons($$, a) }
+ ( ListContinuationBlock { a = cons($$, a) } )*
+ {
+ raw := mk_str_from_list(a, false)
+ raw.key = RAW
+ $$ = mk_element(LISTITEM)
+ $$.children = raw
+ }
+
+ListBlock = a:StartList
+ Line { a = cons($$, a) }
+ ( ListBlockLine { a = cons($$, a) } )*
+ { $$ = mk_str_from_list(a, false) }
+
+ListContinuationBlock = a:StartList
+ ( < BlankLine* >
+ { if len(yytext) == 0 {
+ a = cons(mk_str("\001"), a) // block separator
+ } else {
+ a = cons(mk_str(yytext), a)
+ }
+ } )
+ ( Indent ListBlock { a = cons($$, a) } )+
+ { $$ = mk_str_from_list(a, false) }
+
+Enumerator = NonindentSpace [0-9]+ '.' Spacechar+
+
+OrderedList = &Enumerator (ListTight | ListLoose)
+ { $$.key = ORDEREDLIST }
+
+ListBlockLine = !( Indent? (Bullet | Enumerator) )
+ !BlankLine
+ !HorizontalRule
+ OptionallyIndentedLine
+
+# Parsers for different kinds of block-level HTML content.
+# This is repetitive due to constraints of PEG grammar.
+
+HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>'
+HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>'
+
+HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>'
+HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>'
+
+HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>'
+HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>'
+
+HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>'
+HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>'
+
+HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>'
+HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>'
+
+HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>'
+HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>'
+
+HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>'
+HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>'
+
+HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>'
+HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>'
+
+HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>'
+HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>'
+
+HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>'
+HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>'
+
+HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>'
+HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>'
+
+HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>'
+HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>'
+
+HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>'
+HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>'
+
+HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>'
+HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>'
+
+HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>'
+HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>'
+
+HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>'
+HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>'
+
+HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>'
+HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>'
+
+HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>'
+HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>'
+
+HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>'
+HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>'
+
+HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>'
+HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>'
+
+HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>'
+HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>'
+
+HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>'
+HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>'
+
+HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>'
+HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>'
+
+HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>'
+HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>'
+
+HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>'
+HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>'
+
+HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>'
+HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>'
+
+HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>'
+HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>'
+
+HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>'
+HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>'
+
+HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>'
+HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>'
+
+HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>'
+HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>'
+
+HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>'
+HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>'
+
+HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>'
+HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>'
+
+HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>'
+HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>'
+
+HtmlBlockInTags = HtmlBlockOpenAddress (HtmlBlockInTags | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress
+ | HtmlBlockOpenBlockquote (HtmlBlockInTags | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote
+ | HtmlBlockOpenCenter (HtmlBlockInTags | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter
+ | HtmlBlockOpenDir (HtmlBlockInTags | !HtmlBlockCloseDir .)* HtmlBlockCloseDir
+ | HtmlBlockOpenDiv (HtmlBlockInTags | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv
+ | HtmlBlockOpenDl (HtmlBlockInTags | !HtmlBlockCloseDl .)* HtmlBlockCloseDl
+ | HtmlBlockOpenFieldset (HtmlBlockInTags | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset
+ | HtmlBlockOpenForm (HtmlBlockInTags | !HtmlBlockCloseForm .)* HtmlBlockCloseForm
+ | HtmlBlockOpenH1 (HtmlBlockInTags | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1
+ | HtmlBlockOpenH2 (HtmlBlockInTags | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2
+ | HtmlBlockOpenH3 (HtmlBlockInTags | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3
+ | HtmlBlockOpenH4 (HtmlBlockInTags | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4
+ | HtmlBlockOpenH5 (HtmlBlockInTags | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5
+ | HtmlBlockOpenH6 (HtmlBlockInTags | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6
+ | HtmlBlockOpenMenu (HtmlBlockInTags | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu
+ | HtmlBlockOpenNoframes (HtmlBlockInTags | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes
+ | HtmlBlockOpenNoscript (HtmlBlockInTags | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript | HtmlBlockOpenOl (HtmlBlockInTags | !HtmlBlockCloseOl .)* HtmlBlockCloseOl
+ | HtmlBlockOpenP (HtmlBlockInTags | !HtmlBlockCloseP .)* HtmlBlockCloseP
+ | HtmlBlockOpenPre (HtmlBlockInTags | !HtmlBlockClosePre .)* HtmlBlockClosePre
+ | HtmlBlockOpenTable (HtmlBlockInTags | !HtmlBlockCloseTable .)* HtmlBlockCloseTable
+ | HtmlBlockOpenUl (HtmlBlockInTags | !HtmlBlockCloseUl .)* HtmlBlockCloseUl
+ | HtmlBlockOpenDd (HtmlBlockInTags | !HtmlBlockCloseDd .)* HtmlBlockCloseDd
+ | HtmlBlockOpenDt (HtmlBlockInTags | !HtmlBlockCloseDt .)* HtmlBlockCloseDt
+ | HtmlBlockOpenFrameset (HtmlBlockInTags | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset
+ | HtmlBlockOpenLi (HtmlBlockInTags | !HtmlBlockCloseLi .)* HtmlBlockCloseLi
+ | HtmlBlockOpenTbody (HtmlBlockInTags | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody
+ | HtmlBlockOpenTd (HtmlBlockInTags | !HtmlBlockCloseTd .)* HtmlBlockCloseTd
+ | HtmlBlockOpenTfoot (HtmlBlockInTags | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot
+ | HtmlBlockOpenTh (HtmlBlockInTags | !HtmlBlockCloseTh .)* HtmlBlockCloseTh
+ | HtmlBlockOpenThead (HtmlBlockInTags | !HtmlBlockCloseThead .)* HtmlBlockCloseThead
+ | HtmlBlockOpenTr (HtmlBlockInTags | !HtmlBlockCloseTr .)* HtmlBlockCloseTr
+ | HtmlBlockOpenScript (HtmlBlockInTags | !HtmlBlockCloseScript .)* HtmlBlockCloseScript
+
+HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) >
+ BlankLine+
+ { if p.extension(EXT_FILTER_HTML) {
+ $$ = mk_list(LIST, nil)
+ } else {
+ $$ = mk_str(yytext)
+ $$.key = HTMLBLOCK
+ }
+ }
+
+HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>'
+
+HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" |
+ "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" |
+ "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" |
+ "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" |
+ "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" |
+ "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT"
+
+StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>'
+StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>'
+InStyleTags = StyleOpen (!StyleClose .)* StyleClose
+StyleBlock = < InStyleTags >
+ BlankLine*
+ { if p.extension(EXT_FILTER_STYLES) {
+ $$ = mk_list(LIST, nil)
+ } else {
+ $$ = mk_str(yytext)
+ $$.key = HTMLBLOCK
+ }
+ }
+
+Inlines = a:StartList ( !Endline Inline { a = cons($$, a) }
+ | c:Endline &Inline { a = cons(c, a) } )+ Endline?
+ { $$ = mk_list(LIST, a) }
+
+Inline = Str
+ | Endline
+ | UlOrStarLine
+ | Space
+ | Strong
+ | Emph
+ | Image
+ | Link
+ | NoteReference
+ | InlineNote
+ | Code
+ | RawHtml
+ | Entity
+ | EscapedChar
+ | Smart
+ | Symbol
+
+Space = Spacechar+
+ { $$ = mk_str(" ")
+ $$.key = SPACE }
+
+Str = < NormalChar (NormalChar | '_'+ &NormalChar)* >
+ { $$ = mk_str(yytext) }
+
+EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] >
+ { $$ = mk_str(yytext) }
+
+Entity = ( HexEntity | DecEntity | CharEntity )
+ { $$ = mk_str(yytext); $$.key = HTML }
+
+Endline = LineBreak | TerminalEndline | NormalEndline
+
+NormalEndline = Sp Newline !BlankLine !'>' !AtxStart
+ !(Line ("===" '='* | "---" '-'*) Newline)
+ { $$ = mk_str("\n")
+ $$.key = SPACE }
+
+TerminalEndline = Sp Newline Eof
+ { $$ = nil }
+
+LineBreak = " " NormalEndline
+ { $$ = mk_element(LINEBREAK) }
+
+Symbol = < SpecialChar >
+ { $$ = mk_str(yytext) }
+
+# This keeps the parser from getting bogged down on long strings of '*' or '_',
+# or strings of '*' or '_' with space on each side:
+UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext) }
+StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar >
+UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar >
+
+Emph = EmphStar | EmphUl
+
+OneStarOpen = !StarLine '*' !Spacechar !Newline
+OneStarClose = !Spacechar !Newline a:Inline !StrongStar '*' { $$ = a }
+
+EmphStar = OneStarOpen
+ a:StartList
+ ( !OneStarClose Inline { a = cons($$, a) } )*
+ OneStarClose { a = cons($$, a) }
+ { $$ = mk_list(EMPH, a) }
+
+OneUlOpen = !UlLine '_' !Spacechar !Newline
+OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a }
+
+EmphUl = OneUlOpen
+ a:StartList
+ ( !OneUlClose Inline { a = cons($$, a) } )*
+ OneUlClose { a = cons($$, a) }
+ { $$ = mk_list(EMPH, a) }
+
+Strong = StrongStar | StrongUl
+
+TwoStarOpen = !StarLine "**" !Spacechar !Newline
+TwoStarClose = !Spacechar !Newline a:Inline "**" { $$ = a }
+
+StrongStar = TwoStarOpen
+ a:StartList
+ ( !TwoStarClose Inline { a = cons($$, a) } )*
+ TwoStarClose { a = cons($$, a) }
+ { $$ = mk_list(STRONG, a) }
+
+TwoUlOpen = !UlLine "__" !Spacechar !Newline
+TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a }
+
+StrongUl = TwoUlOpen
+ a:StartList
+ ( !TwoUlClose Inline { a = cons($$, a) } )*
+ TwoUlClose { a = cons($$, a) }
+ { $$ = mk_list(STRONG, a) }
+
+Image = '!' ( ExplicitLink | ReferenceLink )
+ { $$.key = IMAGE }
+
+Link = ExplicitLink | ReferenceLink | AutoLink
+
+ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle
+
+ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label
+ {
+ if match, found := p.findReference(b.children); found {
+ $$ = mk_link(a.children, match.url, match.title);
+ a = nil
+ b = nil
+ } else {
+ result := mk_element(LIST)
+ result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext),
+ cons(mk_str("["), cons(b, mk_str("]")))))))
+ $$ = result
+ }
+ }
+
+ReferenceLinkSingle = a:Label < (Spnl "[]")? >
+ {
+ if match, found := p.findReference(a.children); found {
+ $$ = mk_link(a.children, match.url, match.title)
+ a = nil
+ } else {
+ result := mk_element(LIST)
+ result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext))));
+ $$ = result
+ }
+ }
+
+ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')'
+ { $$ = mk_link(l.children, s.contents.str, t.contents.str)
+ s = nil
+ t = nil
+ l = nil }
+
+Source = ( '<' < SourceContents > '>' | < SourceContents > )
+ { $$ = mk_str(yytext) }
+
+SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')*
+ | ""
+
+Title = ( TitleSingle | TitleDouble | < "" > )
+ { $$ = mk_str(yytext) }
+
+TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\''
+
+TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"'
+
+AutoLink = AutoLinkUrl | AutoLinkEmail
+
+AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>'
+ { $$ = mk_link(mk_str(yytext), yytext, "") }
+
+AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>'
+ {
+ $$ = mk_link(mk_str(yytext), "mailto:"+yytext, "")
+ }
+
+Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine*
+ { $$ = mk_link(l.children, s.contents.str, t.contents.str)
+ s = nil
+ t = nil
+ l = nil
+ $$.key = REFERENCE }
+
+Label = '[' ( !'^' &{ p.extension(EXT_NOTES) } | &. &{ !p.extension(EXT_NOTES) } )
+ a:StartList
+ ( !']' Inline { a = cons($$, a) } )*
+ ']'
+ { $$ = mk_list(LIST, a) }
+
+RefSrc = < Nonspacechar+ >
+ { $$ = mk_str(yytext)
+ $$.key = HTML }
+
+RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle )
+ { $$ = mk_str(yytext) }
+
+EmptyTitle = < "" >
+
+RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\''
+
+RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"'
+
+RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')'
+
+References = a:StartList
+ ( b:Reference { a = cons(b, a) } | SkipBlock )*
+ { p.references = reverse(a) }
+ commit
+
+Ticks1 = "`" !'`'
+Ticks2 = "``" !'`'
+Ticks3 = "```" !'`'
+Ticks4 = "````" !'`'
+Ticks5 = "`````" !'`'
+
+Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1
+ | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2
+ | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3
+ | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4
+ | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5
+ )
+ { $$ = mk_str(yytext); $$.key = CODE }
+
+RawHtml = < (HtmlComment | HtmlTag) >
+ { if p.extension(EXT_FILTER_HTML) {
+ $$ = mk_list(LIST, nil)
+ } else {
+ $$ = mk_str(yytext)
+ $$.key = HTML
+ }
+ }
+
+BlankLine = Sp Newline
+
+Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\''
+HtmlAttribute = (Alphanumeric | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl
+HtmlComment = "<!--" (!"-->" .)* "-->"
+HtmlTag = '<' Spnl '/'? Alphanumeric+ Spnl HtmlAttribute* '/'? Spnl '>'
+Eof = !.
+Spacechar = ' ' | '\t'
+Nonspacechar = !Spacechar !Newline .
+Newline = '\n' | '\r' '\n'?
+Sp = Spacechar*
+Spnl = Sp (Newline Sp)?
+SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '\\' | ExtendedSpecialChar
+NormalChar = !( SpecialChar | Spacechar | Newline ) .
+Alphanumeric = [A-Za-z0-9]
+Digit = [0-9]
+
+HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' >
+DecEntity = < '&' '#' [0-9]+ > ';' >
+CharEntity = < '&' [A-Za-z0-9]+ ';' >
+
+NonindentSpace = " " | " " | " " | ""
+Indent = "\t" | " "
+IndentedLine = Indent Line
+OptionallyIndentedLine = Indent? Line
+
+# StartList starts a list data structure that can be added to with cons:
+StartList = &.
+ { $$ = nil }
+
+Line = RawLine
+ { $$ = mk_str(yytext) }
+RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof )
+
+SkipBlock = ( !BlankLine RawLine )+ BlankLine*
+ | BlankLine+
+
+# Syntax extensions
+
+ExtendedSpecialChar = &{ p.extension(EXT_SMART) } ('.' | '-' | '\'' | '"')
+ | &{ p.extension(EXT_NOTES) } ( '^' )
+
+Smart = &{ p.extension(EXT_SMART) }
+ ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe )
+
+Apostrophe = '\''
+ { $$ = mk_element(APOSTROPHE) }
+
+Ellipsis = ("..." | ". . .")
+ { $$ = mk_element(ELLIPSIS) }
+
+Dash = EmDash | EnDash
+
+EnDash = '-' &Digit
+ { $$ = mk_element(ENDASH) }
+
+EmDash = ("---" | "--")
+ { $$ = mk_element(EMDASH) }
+
+SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric )
+
+SingleQuoteEnd = '\'' !Alphanumeric
+
+SingleQuoted = SingleQuoteStart
+ a:StartList
+ ( !SingleQuoteEnd b:Inline { a = cons(b, a) } )+
+ SingleQuoteEnd
+ { $$ = mk_list(SINGLEQUOTED, a) }
+
+DoubleQuoteStart = '"'
+
+DoubleQuoteEnd = '"'
+
+DoubleQuoted = DoubleQuoteStart
+ a:StartList
+ ( !DoubleQuoteEnd b:Inline { a = cons(b, a) } )+
+ DoubleQuoteEnd
+ { $$ = mk_list(DOUBLEQUOTED, a) }
+
+NoteReference = &{ p.extension(EXT_NOTES) }
+ ref:RawNoteReference
+ {
+ if match, ok := p.find_note(ref.contents.str); ok {
+ $$ = mk_element(NOTE)
+ $$.children = match.children
+ $$.contents.str = ""
+ } else {
+ $$ = mk_str("[^"+ref.contents.str+"]")
+ }
+ }
+
+RawNoteReference = "[^" < ( !Newline !']' . )+ > ']'
+ { $$ = mk_str(yytext) }
+
+Note = &{ p.extension(EXT_NOTES) }
+ NonindentSpace ref:RawNoteReference ':' Sp
+ a:StartList
+ ( RawNoteBlock { a = cons($$, a) } )
+ ( &Indent RawNoteBlock { a = cons($$, a) } )*
+ { $$ = mk_list(NOTE, a)
+ $$.contents.str = ref.contents.str
+ }
+
+InlineNote = &{ p.extension(EXT_NOTES) }
+ "^["
+ a:StartList
+ ( !']' Inline { a = cons($$, a) } )+
+ ']'
+ { $$ = mk_list(NOTE, a)
+ $$.contents.str = "" }
+
+Notes = a:StartList
+ ( b:Note { a = cons(b, a) } | SkipBlock )*
+ { p.notes = reverse(a) }
+ commit
+
+RawNoteBlock = a:StartList
+ ( !BlankLine OptionallyIndentedLine { a = cons($$, a) } )+
+ ( < BlankLine* > { a = cons(mk_str(yytext), a) } )
+ { $$ = mk_str_from_list(a, true)
+ $$.key = RAW
+ }
+
+%%
+
+
+/*
+ * List manipulation functions
+ */
+
+
+/* cons - cons an element onto a list, returning pointer to new head
+ */
+func cons(new, list *element) *element {
+ new.next = list
+ return new
+}
+
+/* reverse - reverse a list, returning pointer to new list
+ */
+func reverse(list *element) (new *element) {
+ for list != nil {
+ next := list.next
+ new = cons(list, new)
+ list = next
+ }
+ return
+}
+
+/* concat_string_list - concatenates string contents of list of STR elements.
+ */
+func concat_string_list(list *element) string {
+ s := ""
+ for list != nil {
+ s += list.contents.str
+ list = list.next
+ }
+ return s
+}
+
+
+/*
+ * Auxiliary functions for parsing actions.
+ * These make it easier to build up data structures (including lists)
+ * in the parsing actions.
+ */
+
+
+/* mk_element - generic constructor for element
+ */
+func mk_element(key int) *element {
+ return &element{key: key}
+}
+
+/* mk_str - constructor for STR element
+ */
+func mk_str(s string) (result *element) {
+ result = mk_element(STR)
+ result.contents.str = s
+ return
+}
+
+/* mk_str_from_list - makes STR element by concatenating a
+ * reversed list of strings, adding optional extra newline
+ */
+func mk_str_from_list(list *element, extra_newline bool) (result *element) {
+ s := concat_string_list(reverse(list))
+ if extra_newline {
+ s += "\n"
+ }
+ result = mk_element(STR)
+ result.contents.str = s
+ return
+}
+
+/* mk_list - makes new list with key 'key' and children the reverse of 'lst'.
+ * This is designed to be used with cons to build lists in a parser action.
+ * The reversing is necessary because cons adds to the head of a list.
+ */
+func mk_list(key int, lst *element) *element {
+ result := mk_element(key)
+ result.children = reverse(lst)
+ return result
+}
+
+/* mk_link - constructor for LINK element
+ */
+func mk_link(label *element, url, title string) *element {
+ result := mk_element(LINK)
+ result.contents.link = &link{label: label, url: url, title: title}
+ return result
+}
+
+
+/* extension = returns true if extension is selected
+ */
+func (d *Doc) extension(ext int) bool {
+ return d.syntaxExtensions&ext != 0
+}
+
+/* match_inlines - returns true if inline lists match (case-insensitive...)
+ */
+func match_inlines(l1, l2 *element) bool {
+ for l1 != nil && l2 != nil {
+ if l1.key != l2.key {
+ return false
+ }
+ switch l1.key {
+ case SPACE, LINEBREAK, ELLIPSIS, EMDASH, ENDASH, APOSTROPHE:
+ break
+ case CODE, STR, HTML:
+ if strings.ToUpper(l1.contents.str) != strings.ToUpper(l2.contents.str) {
+ return false
+ }
+ case EMPH, STRONG, LIST, SINGLEQUOTED, DOUBLEQUOTED:
+ if !match_inlines(l1.children, l2.children) {
+ return false
+ }
+ case LINK, IMAGE:
+ return false /* No links or images within links */
+ default:
+ log.Exitf("match_inlines encountered unknown key = %d\n", l1.key)
+ }
+ l1 = l1.next
+ l2 = l2.next
+ }
+ return l1 == nil && l2 == nil /* return true if both lists exhausted */
+}
+
+
+/* find_reference - return true if link found in references matching label.
+ * 'link' is modified with the matching url and title.
+ */
+func (d *Doc) findReference(label *element) (*link, bool) {
+ for cur := d.references; cur != nil; cur = cur.next {
+ l := cur.contents.link
+ if match_inlines(label, l.label) {
+ return l, true
+ }
+ }
+ return nil, false
+}
+
+
+/* find_note - return true if note found in notes matching label.
+ * if found, 'result' is set to point to matched note.
+ */
+func (d *Doc) find_note(label string) (*element, bool) {
+ for el := d.notes; el != nil; el = el.next {
+ if label == el.contents.str {
+ return el, true
+ }
+ }
+ return nil, false
+}
+
+
+/* print tree of elements, for debugging only.
+ */
+func print_tree(elt *element, indent int) {
+ var key string
+
+ for elt != nil {
+ for i := 0; i < indent; i++ {
+ fmt.Print("\t")
+ }
+ key = keynames[elt.key]
+ if key == "" {
+ key = "?"
+ }
+ if elt.key == STR {
+ fmt.Printf("%p:\t%s\t'%s'\n", elt, key, elt.contents.str)
+ } else {
+ fmt.Printf("%p:\t%s %p\n", elt, key, elt.next)
+ }
+ if elt.children != nil {
+ print_tree(elt.children, indent+1)
+ }
+ elt = elt.next
+ }
+}
+
+var keynames = [numVAL]string{
+ LIST: "LIST",
+ RAW: "RAW",
+ SPACE: "SPACE",
+ LINEBREAK: "LINEBREAK",
+ ELLIPSIS: "ELLIPSIS",
+ EMDASH: "EMDASH",
+ ENDASH: "ENDASH",
+ APOSTROPHE: "APOSTROPHE",
+ SINGLEQUOTED: "SINGLEQUOTED",
+ DOUBLEQUOTED: "DOUBLEQUOTED",
+ STR: "STR",
+ LINK: "LINK",
+ IMAGE: "IMAGE",
+ CODE: "CODE",
+ HTML: "HTML",
+ EMPH: "EMPH",
+ STRONG: "STRONG",
+ PLAIN: "PLAIN",
+ PARA: "PARA",
+ LISTITEM: "LISTITEM",
+ BULLETLIST: "BULLETLIST",
+ ORDEREDLIST: "ORDEREDLIST",
+ H1: "H1",
+ H2: "H2",
+ H3: "H3",
+ H4: "H4",
+ H5: "H5",
+ H6: "H6",
+ BLOCKQUOTE: "BLOCKQUOTE",
+ VERBATIM: "VERBATIM",
+ HTMLBLOCK: "HTMLBLOCK",
+ HRULE: "HRULE",
+ REFERENCE: "REFERENCE",
+ NOTE: "NOTE",
+}
Please sign in to comment.
Something went wrong with that request. Please try again.