Permalink
Browse files

Initial working version with support for some (not all) Markdown blocks.

  • Loading branch information...
0 parents commit 66c17f110998cee9aafbe254757ad65037d4e140 @malcolmsparks committed Jun 14, 2011
@@ -0,0 +1,6 @@
+*jar
+/lib/
+/classes/
+.lein-failures
+.lein-deps-sum
+/target/
661 LICENSE

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,32 @@
+# A Clojure Markdown library.
+
+The purpose of this library is to provide Clojure with native support for
+Markdown and other text-based authoring formats such as Org-mode.
+
+## Rationale
+
+Existing Markdown implementations in other languages are available to Clojure
+developers but these all combine a parser front-end to Markdown with a backend
+emitter (ie. HTML). For these to be used in Clojure the developer has to parse
+the result back into Clojure structures for further processing.
+
+A more efficient and flexible approach is to have the Markdown front-end create
+a native Clojure data structure (maps and vectors) that can be manipulated by
+functions and emitted into HTML using prxml, hiccup or a custom renderer. This
+also makes it easy to support other output formats, for example, DocBook XML as
+part of a professional publishing toolchain.
+
+Another possibility would be to include rich text in a web application based on
+Compojure (hiccup).
+
+## Extensions
+
+The overall aim is to pass all Markdown tests but while making it easier to
+configure and create extensions that can adapt the library to different Markdown
+flavors.
+
+## Status
+
+Currently this objective has not been reached and this library remains in a
+work-in-progress 'alpha' state. Once the library fully passes the entire
+Markdown test suite a version 1.0 will be released.
64 pom.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>clj-markdown</groupId>
+ <artifactId>clj-markdown</artifactId>
+ <version>0.1</version>
+ <name>clj-markdown</name>
+ <packaging>clojure</packaging>
+ <description>A Clojure library to parse the Markdown format.</description>
+ <url>http://github.com/malcolmsparks/clj-markdown</url>
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+ <build>
+ <resources>
+ <resource>
+ <directory>src/test/resources</directory>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.theoryinpractise</groupId>
+ <artifactId>clojure-maven-plugin</artifactId>
+ <version>1.3.6</version>
+ <extensions>true</extensions>
+ </plugin>
+ </plugins>
+ </build>
+ <repositories>
+ <repository>
+ <id>central</id>
+ <url>http://repo1.maven.org/maven2</url>
+ </repository>
+ <repository>
+ <id>clojure</id>
+ <url>http://build.clojure.org/releases</url>
+ </repository>
+ <repository>
+ <id>clojure-snapshots</id>
+ <url>http://build.clojure.org/snapshots</url>
+ </repository>
+ <repository>
+ <id>clojars</id>
+ <url>http://clojars.org/repo/</url>
+ </repository>
+ </repositories>
+ <dependencies>
+ <dependency>
+ <groupId>org.clojure</groupId>
+ <artifactId>clojure</artifactId>
+ <version>1.2.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.clojure</groupId>
+ <artifactId>clojure-contrib</artifactId>
+ <version>1.2.0</version>
+ </dependency>
+ <dependency>
+ <groupId>swank-clojure</groupId>
+ <artifactId>swank-clojure</artifactId>
+ <version>1.2.1</version>
+ </dependency>
+ </dependencies>
+</project>
@@ -0,0 +1,12 @@
+(defproject clj-markdown "1.0.0-SNAPSHOT"
+ :description "A Clojure library to parse the Markdown format."
+
+ :dependencies [[org.clojure/clojure "1.2.0"]
+ [org.clojure/clojure-contrib "1.2.0"]]
+ :dev-dependencies [[swank-clojure "1.2.0"]]
+
+ :source-path "src/main/clojure"
+ :library-path "target/dependency"
+ :test-path "src/test/clojure"
+ :target-dir "target/"
+ :local-repo-classpath true)
@@ -0,0 +1,250 @@
+;; Copyright 2010 Malcolm Sparks.
+;;
+;; This file is part of clj-markdown.
+;;
+;; clj-markdown is free software: you can redistribute it and/or modify it under the
+;; terms of the GNU Affero General Public License as published by the Free
+;; Software Foundation, either version 3 of the License, or (at your option) any
+;; later version.
+;;
+;; clj-markdown is distributed in the hope that it will be useful but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+;; A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+;; details.
+;;
+;; Please see the LICENSE file for a copy of the GNU Affero General Public License.
+
+(ns clj-markdown.core
+ (:use clojure.contrib.pprint)
+ (:require [clojure.java.io :as io]))
+
+(defprotocol LineProcessor
+ (process-line [this state])
+ (process-eof [this state]))
+
+(defn process-lines [^LineProcessor proc lines]
+ "Call a function for each line with a structure that gives the function access
+to both the individual line and the remaining lines. This separates the job of
+iterating over the lines from the processing of lines themselves. The
+LineProcessor function returns the next state, but this defaults to the next
+line, thereby reducing the probability that a mistake in the processor will
+cause the process to run infinitely."
+ (let [[yields eofs]
+ (split-with #(not (contains? % :end))
+ (letfn [(step [{[line & rem] :remaining :as state}]
+ (let [new-state (assoc state :line line :remaining rem)
+ new-state-tidied (dissoc new-state :yield)]
+ (if (nil? line) (assoc (process-eof proc new-state-tidied) :end true)
+ (process-line proc new-state-tidied))))]
+ (iterate step {:remaining lines})))]
+ (concat yields (list (first eofs)))))
+
+(defn filter-yields [states]
+ (filter #(not (nil? %)) (map :yield states)))
+
+(defn read-markdown-lines [reader]
+ (letfn [
+ (space? [x] (= (int x) 32))
+ (get-leading [x] (count (take-while space? x)))
+ (get-trailing [x] (count (take-while space? (reverse x))))
+ (init [line]
+ (assoc {:value line}
+ :leading (get-leading line)
+ :trailing (get-trailing line)
+ :empty (= (get-leading line) (count line))))]
+ (map init (line-seq reader))))
+
+(defn parse-xml-open-token [input]
+ (letfn [(parse-open-tag [tag] (re-seq #"<(\S+)([^>]*)>" tag))
+ (strip-quotes [s] (.substring s 1 (dec (count s))))]
+ (let [[_ tag attrs] (first (parse-open-tag input))
+ attrs (apply hash-map (mapcat #(let [[[ _ n v] & _]
+ (re-seq #"\s+([^=\s]+)\s?=\s?(\'[^\']*\'|\"[^\"]*\")" %)]
+ [(keyword n) (strip-quotes v)])
+ (re-seq #"\s[^=\s]+\s?=\s?(?:\'[^\']*\'|\"[^\"]*\")" attrs)))]
+ (if (empty? attrs)
+ [(keyword tag)]
+ [(keyword tag) attrs]))))
+
+(defn process-markdown-lines [input]
+ (letfn [(fold [stack]
+ (cond (empty? stack) nil
+ (= (count stack) 1) (first stack)
+ :otherwise (conj (pop (pop stack)) (conj (peek (pop stack)) (peek stack)))))
+ (fold-list-temp [temp]
+ (conj (pop (pop temp))
+ (assoc (peek (pop temp)) :content (conj (:content (peek (pop temp)))
+ (:content (peek temp))))))
+ (get-trimmed-value [left-trim line]
+ (let [to (- (count (:value line)) (:trailing line))]
+ (if (<= left-trim to)
+ (.substring (:value line) left-trim to)
+ "")))
+ (parse-xml [stack tok]
+ (cond
+ (.startsWith tok "</") (fold stack)
+ (.startsWith tok "<") (conj stack (parse-xml-open-token tok))
+ (= 0 (count (.trim tok))) stack
+ :otherwise (conj (pop stack) (conj (peek stack) tok))))
+ (pushback [state] (assoc state :remaining (cons (:line state) (:remaining state))))
+ (wrap-xml [v] [::xml v])
+ (wrap-ulist [v] [::ulist v])
+ (wrap-li [v] [::list-item v])]
+
+ (process-lines
+ (reify LineProcessor
+ (process-eof [this state] state) ; TODO
+ (process-line
+ [this {:keys [line remaining temp mode] :or {temp []} :as state}]
+
+ (cond
+
+ ;; Initialization
+ (not (contains? state :temp)) (assoc (pushback state) :case :init :temp [])
+ (not (contains? state :mode)) (assoc (pushback state) :case :init :mode ::text)
+
+ ;; Existing mode is XML?
+ (= mode ::xml)
+ (let [toks (re-seq #"(?:[<>][^<>]+[<>])|(?:[^<>]+)" (:value line))
+ xml (reduce parse-xml temp toks)]
+ (if (vector? xml)
+ (assoc state
+ :case :finish-xml
+ :mode ::text
+ :yield (wrap-xml xml)
+ :temp [])
+ (assoc state
+ :case :continue-xml
+ :temp xml)))
+
+ ;; Continue a list.
+ (= mode ::list)
+ (cond
+ (not (nil? (re-matches #"\s*[\*\-\+]\s.*" (:value line))))
+ (cond
+ (> (:leading line) (:leading (last temp)))
+ (assoc state
+ :case :continue-ulist
+ :temp (conj temp {:leading (:leading line) :content (wrap-ulist (wrap-li (:value line)))}))
+ :otherwise
+ ;; This collapses the list of nested levels to the correct one
+ (let [new-temp (first (drop-while #(> (:leading (last %)) (:leading line)) (iterate fold-list-temp temp)))]
+ (assoc state
+ :temp (conj (pop new-temp) (assoc (peek new-temp) :yield (:content (peek new-temp)))))))
+
+ (true? (:empty line))
+ (assoc state :case ::continue-ulist)
+
+ :otherwise
+ (-> state
+ pushback
+ (assoc
+ :mode ::text
+ :case ::ending-ulist
+ :yield (:content (first
+ (first
+ (drop-while #(> (count %) 1)
+ (iterate fold-list-temp temp))))))))
+
+ ;; Starting a list?
+ (and (empty? temp)
+ (not (nil? (re-matches #"\s{0,3}[\*\-\+]\s.*" (:value line)))))
+ (assoc state
+ :case ::starting-ulist
+ :temp [{:leading (:leading line) :content (wrap-ulist (wrap-li (:value line)))}]
+ :mode ::list)
+
+ ;; Heading1
+ (and (= (count temp) 1)
+ (not (nil? (re-matches #"[\=]+" (:value line)))))
+ (assoc state
+ :case ::finish-heading1
+ :yield [::heading1 (:value (first temp))] :temp [])
+
+ ;; Heading2
+ (and (= (count temp) 1)
+ (not (nil? (re-matches #"[-]+" (:value line)))))
+ (assoc state
+ :case ::finish-heading2
+ :yield [::heading2 (:value (first temp))]
+ :temp [])
+
+ ;; Heading (atx style)
+ (not (nil? (re-matches #"#{1,6}\s*.*" (:value line))))
+ (if (empty? temp)
+ (let [[_ hashes v] (first (re-seq #"(#{1,6})\s*(.*?)\s*#*$" (:value line)))
+ ]
+ (assoc state :yield
+ (case (count hashes)
+ 1 [::heading1 v]
+ 2 [::heading2 v]
+ 3 [::heading3 v]
+ 4 [::heading4 v]
+ 5 [::heading5 v]
+ 6 [::heading6 v])))
+ (throw (Exception. "Pushback a blank line to ensure the existing temp is yielded properly.")))
+
+ ;; Start of an XML block?
+ (and (empty? temp)
+ (not (empty? (re-seq #"^<\S+[^>]*>" (:value line)))))
+ (let [toks (re-seq #"(?:[<>][^<>]+[<>])|(?:[^<>]+)" (:value line))
+ xml (reduce parse-xml '() toks)]
+ (if (vector? xml)
+ (assoc state :case ::xml-line :yield (wrap-xml xml))
+ (assoc state :case ::start-xml :temp xml :mode ::xml)))
+
+ ;; Code block
+ (and
+ (= mode ::code-block)
+ (not (:empty line))
+ (>= (:leading line) (:initial-leading state)))
+ (assoc state
+ :case ::continue-code-block
+ :temp (conj temp line))
+
+ ;; Start code block
+ (and (empty? temp)
+ (not (:empty line))
+ (>= (:leading line) 4))
+ (assoc state
+ :case ::start-code-block
+ :mode ::code-block
+ :initial-leading (:leading line)
+ :temp (conj temp line))
+
+ ;; Line is empty
+ (true? (:empty line))
+ (cond
+ (empty? temp)
+ (assoc state :case ::line-empty :temp [])
+
+ (= mode ::code-block)
+ (assoc state
+ :case ::code-block
+ :yield [::code-block (map #(get-trimmed-value (:initial-leading state) %) temp)]
+ :temp []
+ :mode ::text)
+
+ (= mode ::text)
+ (assoc state
+ :case ::para
+ :yield [::para (reduce str (interpose " " (map :value temp)))]
+ :temp [])
+
+ :otherwise state)
+
+ ;; Default paragraph
+ :otherwise
+ (assoc state :case ::default :temp (conj temp line)))))
+
+ input)))
+
+(defn markdown [input]
+ (filter-yields
+ (process-markdown-lines
+ (map (fn [lineno line] (assoc line :lineno lineno))
+ (map inc (range)) ; 1..infinity
+ (read-markdown-lines (io/reader
+ input))))))
+
+
Oops, something went wrong.

0 comments on commit 66c17f1

Please sign in to comment.