Skip to content

Commit

Permalink
Initial version added to git
Browse files Browse the repository at this point in the history
  • Loading branch information
larsch committed Aug 9, 2008
0 parents commit 0d8c3d2
Show file tree
Hide file tree
Showing 7 changed files with 1,036 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
doc
7 changes: 7 additions & 0 deletions Manifest.txt
@@ -0,0 +1,7 @@
History.txt
Manifest.txt
README.txt
Rakefile
lib/creole.rb
test/test_creole.rb
test/testcases.rb
28 changes: 28 additions & 0 deletions README.txt
@@ -0,0 +1,28 @@
= Creole

* http://creole.rubyforge.org/
* http://rubyforge.org/projects/creole/

== DESCRIPTION:

Creole is a Creole-to-HTML converter for Creole, the lightwight markup
language (http://wikicreole.org/).

== SYNOPSIS:

gem 'creole'
require 'creole'
html = Creole.creolize( ... )

== BUGS:

If you found a bug, please report it at the Creole project's tracker
on RubyForge:

http://rubyforge.org/tracker/?group_id=6344

== LICENSE:

RDoc is Copyright (c) 2008 Lars Christensen. It is free software, and
may be redistributed under the terms specified in the README file of
the Ruby distribution.
9 changes: 9 additions & 0 deletions Rakefile
@@ -0,0 +1,9 @@
require 'hoe'

$:.unshift 'lib'
require 'creole'

Hoe.new "creole", Creole::VERSION do |creole|
creole.developer 'Lars Christensen', 'larsch@belunktum.dk'
end

350 changes: 350 additions & 0 deletions lib/creole.rb
@@ -0,0 +1,350 @@
require 'cgi'
require 'uri'

# :main: Creole

# The Creole parses and translates Creole formatted text into
# XHTML. Creole is a lightwight markup syntax similar to what many
# WikiWikiWebs use. Example syntax:
#
# = Heading 1 =
# == Heading 2 ==
# === Heading 3 ===
# **Bold text**
# //Italic text//
# [[Links]]
# |=Table|=Heading|
# |Table |Cells |
# {{image.png}}
#
# The simplest interface is Creole.creolize. The default handling of
# links allow explicit local links using the [[link]] syntax. External
# links will only be allowed if specified using http(s) and ftp(s)
# schemes. If special link handling is needed, such as inter-wiki or
# hierachical local links, you must inherit Creole::CreoleParser and
# override make_link.

module Creole

VERSION = "0.1"

# CreoleParseError is raised when the Creole parser encounters
# something unexpected. This is generally now thrown unless there is
# a bug in the parser.
class CreoleParseError < Exception; end

# Convert the argument in Creole format to HTML and return the
# result. Example:
#
# Creole.creolize("**Hello //World//**")
# #=> "<p><strong>Hello <em>World</em></strong></p>"
#
# This is an alias for calling CreoleParser#parse:
# CreoleParser.new.parse(creole)
def self.creolize(creole)
CreoleParser.new.parse(creole)
end

# Main Creole parser class. Call CreoleParser#parse to parse Creole
# formatted text.
#
# This class is not reentrant. A separate instance is needed for
# each thread that needs to convert Creole to HTML.
#
# Inherit this to provide custom handling of links. The overrideable
# methods are: make_link
class CreoleParser

# Create a new CreoleParser instance.
def initialize
@base = nil
@allowed_schemes = [ 'http', 'https', 'ftp', 'ftps' ]
@uri_scheme_re = @allowed_schemes.join('|')
@link_re = /\b[A-Z][a-z]*([A-Z][a-z]*)+\b/
end

# Parse and convert the argument in Creole text to HTML and return
# the result. The resulting HTML does not contain <html> and
# <body> tags.
#
# Example:
#
# parser = CreoleParser.new
# parser.parse("**Hello //World//**")
# #=> "<p><strong>Hello <em>World</em></strong></p>"
def parse(string)
@out = ""
@strong = false
@p = false
@stack = []
parse_block(string)
return @out
end

# Escape any characters with special meaning in HTML using HTML
# entities.
private
def escape_html(string)
CGI::escapeHTML(string)
end

# Escape any characters with special meaning in URLs using URL
# encoding.
private
def escape_url(string)
CGI::escape(string)
end

private
def toggle_tag(tag, match)
if @stack.include?(tag)
if @stack.last == tag
@stack.pop
@out << '</' << tag << '>'
else
@out << escape_html(match)
end
else
@stack.push(tag)
@out << '<' << tag << '>'
end
end

def end_paragraph
while tag = @stack.pop
@out << "</#{tag}>"
end
@p = false
end

def start_paragraph
if not @p
end_paragraph
@out << '<p>'
@stack.push('p')
@p = true
else
@out << ' ' unless @out[-1,1] == ' '
end
end

# Translate an explicit local link to a desired URL that is
# properly URL-escaped. The default behaviour is to convert local
# links directly, escaping any characters that have special
# meaning in URLs. Relative URLs in local links are not handled.
#
# Examples:
#
# make_link("LocalLink") #=> "LocalLink"
# make_link("/Foo/Bar") #=> "%2FFoo%2FBar"
#
# Must ensure that the result is properly URL-escaped. The caller
# will handle HTML escaping as necessary. HTML links will not be
# inserted if the function returns nil.
#
# Example custom behaviour:
#
# make_link("LocalLink") #=> "/LocalLink"
# make_link("Wikipedia:Bread") #=> "http://en.wikipedia.org/wiki/Bread"
private
def make_link(link) #:doc:
escape_url(link)
end

# Sanatize a direct url (e.g. http://wikipedia.org/). The default
# behaviour returns the original link as-is.
#
# Must ensure that the result is properly URL-escaped. The caller
# will handle HTML escaping as necessary. Links will not be
# converted to HTML links if the function returns link.
#
# Custom versions of this function in inherited classes can
# implement specific link handling behaviour, such as redirection
# to intermediate pages (for example, for notifing the user that
# he is leaving the site).
private
def make_direct_link(url) #:doc:
return url
end

# Sanatize and prefix image URLs. When images are encountered in
# Creole text, this function is called to obtain the actual URL of
# the image. The default behaviour is to return the image link
# as-is. No image tags are inserted if the function returns nil.
#
# Custom version of the method can be used to sanatize URLs
# (e.g. remove query-parts), inhibit off-site images, or add a
# base URL, for example:
#
# def make_image_link(url)
# URI.join("http://mywiki.org/images/", url)
# end
private
def make_image_link(url) #:doc:
return url
end

private
def make_explicit_link(link)
begin
uri = URI.parse(link)
if uri.scheme and @allowed_schemes.include?(uri.scheme)
return uri.to_s
end
rescue URI::InvalidURIError
end
return make_link(link)
end

def parse_inline(str)
until str.empty?
case str
when /\A\r?\n/
return
when /\A(\~)?((https?|ftps?):\/\/\S+?)(?=([,.?!:;"'])?(\s|$))/
if $1
@out << escape_html($2)
else
if uri = make_direct_link($2)
@out << '<a href="' << escape_html(uri) << '">' << escape_html($2) << '</a>'
else
@out << escape_html($&)
end
end
when /\A\[\[\s*([^|]*?)\s*(\|\s*(.*?))?\s*\]\]/m
link = $1
if uri = make_explicit_link(link)
@out << '<a href="' << escape_html(uri) << '">' << escape_html($3 || link) << '</a>'
else
@out << escape_html($&)
end
when @link_re
if uri = make_explicit_link($&)
@out << '<a href="' << escape_html(uri) << '">' << escape_html($&) << '</a>'
else
@out << escape_html($&)
end
when /\A[^\/\\*\s{}~]+/
@out << escape_html($&)
when /\A\{\{\{(.*)\}\}\}/
@out << '<tt>' << escape_html($1) << '</tt>'
when /\A\{\{\s*(.*?)\s*(\|\s*(.*?)\s*)?\}\}/ # (|\s*(.*?)\s*)?*\}\}/
if uri = make_image_link($1)
if $3
@out << '<img src="' << escape_html(uri) << '" alt="' << escape_html($3) << '"/>'
else
@out << '<img src="' << escape_html(uri) << '"/>'
end
else
@out << escape_html($&)
end
when /\A~([^\s])/
@out << escape_html($1)
when /\A[ \t]+/
@out << ' ' unless @out[-1,1] == ' '
when /\A\*\*/
toggle_tag 'strong', $&
when /\A\/\//
toggle_tag 'em', $&
when /\A\\\\/
@out << '<br/>'
when /./
@out << escape_html($&)
else
raise CreoleParseError, "Parse error at #{str[0,30].inspect}"
end
# p [$&, $']
str = $'
end
end

def parse_table_row(str)
@out << '<tr>'
str.scan(/\s*\|(=)?\s*(([^|~]|~.)*)(?=\||$)/) {
unless $2.empty? and $'.empty?
@out << ($1 ? '<th>' : '<td>')
parse_inline($2) if $2
until @stack.last == 'table'
@out << '</' << @stack.pop << '>'
end
@out << ($1 ? '</th>' : '</td>')
end
}
@out << '</tr>'
end

def make_nowikiblock(input)
input.gsub(/^ (?=\}\}\})/, '')
end

def ulol(x); x=='ul'||x=='ol'; end

def parse_block(str)
until str.empty?
case str
when /\A\{\{\{\r?\n(.*?)\r?\n\}\}\}/m
end_paragraph
nowikiblock = make_nowikiblock($1)
@out << '<pre>' << escape_html(nowikiblock) << '</pre>'
when /\A\s*-{4,}\s*$/
end_paragraph
@out << '<hr/>'
when /\A\s*(={1,6})\s*(.*?)\s*=*\s*$(\r?\n)?/
end_paragraph
level = $1.size
@out << "<h#{level}>" << escape_html($2) << "</h#{level}>"
when /\A[ \t]*\|.*$(\r?\n)?/
unless @stack.include?('table')
end_paragraph
@stack.push('table')
@out << '<table>'
end
parse_table_row($&)
when /\A\s*$(\r?\n)?/
end_paragraph
when /\A(\s*([*#]+)\s*(.*?))$(\r?\n)?/
line, bullet, item = $1, $2, $3
tag = (bullet[0,1] == '*' ? 'ul' : 'ol')
listre = /\A[ou]l\z/
if bullet[0,1] == '#' or bullet.size != 2 or @stack.find { |x| x=='ol' || x == 'ul' }
ulcount = @stack.inject(0) { |a,b| a + (ulol(b) ? 1 : 0) }
while ulcount > bullet.size or not (@stack.empty? or ulol(@stack.last))
@out << '</' + @stack.last << '>'
ulcount -= 1 if ulol(@stack.pop)
end

if ulcount == bullet.size and @stack.last != tag
@out << '</' << @stack.last << '>'
@stack.pop
ulcount -= 1
end

while ulcount < bullet.size
@out << '<' << tag << '>'
@stack.push tag
ulcount += 1
end
@p = true
@out << '<li>'
@stack.push('li')
parse_inline(item)
else
start_paragraph
parse_inline(line)
end
when /\A([ \t]*\S+.*?)$(\r?\n)?/
start_paragraph
parse_inline($1)
else
raise CreoleParseError, "Parse error at #{str[0,30].inspect}"
end
#p [$&, $']
str = $'
end
end_paragraph
return @out
end

end # class CreoleParser

end # module Creole

0 comments on commit 0d8c3d2

Please sign in to comment.