Permalink
Browse files

initial work for readme parsing

  • Loading branch information...
1 parent 1708305 commit 8220249c11edde08f35053260e8afbcb2da0b56b @ddollar ddollar committed Nov 1, 2009
Showing with 772 additions and 3 deletions.
  1. +9 −0 db/migrate/20091101223139_add_readme_to_rubygems.rb
  2. +2 −1 db/schema.rb
  3. +15 −2 lib/gemcutter.rb
  4. +1 −0 vendor/plugins/github_markup/.gitignore
  5. +26 −0 vendor/plugins/github_markup/.kick
  6. +20 −0 vendor/plugins/github_markup/LICENSE
  7. +82 −0 vendor/plugins/github_markup/README.md
  8. +11 −0 vendor/plugins/github_markup/Rakefile
  9. +1 −0 vendor/plugins/github_markup/init.rb
  10. +119 −0 vendor/plugins/github_markup/lib/github/commands/asciidoc2html
  11. +240 −0 vendor/plugins/github_markup/lib/github/commands/asciidocapi.py
  12. +48 −0 vendor/plugins/github_markup/lib/github/commands/rest2html
  13. +62 −0 vendor/plugins/github_markup/lib/github/markup.rb
  14. +19 −0 vendor/plugins/github_markup/lib/github/markup/rdoc.rb
  15. +26 −0 vendor/plugins/github_markup/lib/github/markups.rb
  16. +18 −0 vendor/plugins/github_markup/test/markup_test.rb
  17. +2 −0 vendor/plugins/github_markup/test/markups/README.asciidoc
  18. +13 −0 vendor/plugins/github_markup/test/markups/README.asciidoc.html
  19. +2 −0 vendor/plugins/github_markup/test/markups/README.markdown
  20. +5 −0 vendor/plugins/github_markup/test/markups/README.markdown.html
  21. +2 −0 vendor/plugins/github_markup/test/markups/README.noformat
  22. +2 −0 vendor/plugins/github_markup/test/markups/README.noformat.html
  23. +7 −0 vendor/plugins/github_markup/test/markups/README.pod
  24. +10 −0 vendor/plugins/github_markup/test/markups/README.pod.html
  25. +2 −0 vendor/plugins/github_markup/test/markups/README.rdoc
  26. +8 −0 vendor/plugins/github_markup/test/markups/README.rdoc.html
  27. +3 −0 vendor/plugins/github_markup/test/markups/README.rst
  28. +7 −0 vendor/plugins/github_markup/test/markups/README.rst.html
  29. +2 −0 vendor/plugins/github_markup/test/markups/README.textile
  30. +4 −0 vendor/plugins/github_markup/test/markups/README.textile.html
  31. +2 −0 vendor/plugins/github_markup/test/markups/README.txt
  32. +2 −0 vendor/plugins/github_markup/test/markups/README.txt.html
@@ -0,0 +1,9 @@
+class AddReadmeToRubygems < ActiveRecord::Migration
+ def self.up
+ add_column :rubygems, :readme, :text
+ end
+
+ def self.down
+ remove_column :rubygems, :readme
+ end
+end
View
@@ -9,7 +9,7 @@
#
# It's strongly recommended to check this file into your version control system.
-ActiveRecord::Schema.define(:version => 20091026234707) do
+ActiveRecord::Schema.define(:version => 20091101223139) do
create_table "delayed_jobs", :force => true do |t|
t.integer "priority", :default => 0
@@ -85,6 +85,7 @@
t.integer "downloads", :default => 0
t.string "slug"
t.integer "versions_count", :default => 0
+ t.text "readme"
end
add_index "rubygems", ["name"], :name => "index_rubygems_on_name"
View
@@ -13,7 +13,7 @@ def initialize(user, body)
end
def process
- pull_spec && find && authorize && save
+ pull_spec && find && authorize && extract_readme && save
end
def authorize
@@ -54,9 +54,12 @@ def update
false
end
+ def format
+ @format ||= Gem::Format.from_io(self.body)
+ end
+
def pull_spec
begin
- format = Gem::Format.from_io(self.body)
@spec = format.spec
rescue Exception => e
notify("Gemcutter cannot process this gem.\n" +
@@ -65,6 +68,16 @@ def pull_spec
end
end
+ def extract_readme
+ files = format.file_entries.inject({}) do |hash, (attributes, data)|
+ hash.update(attributes['path'] => data)
+ end
+
+ if readme = files.keys.sort.detect { |filename| filename =~ /^README/ }
+ rubygem.readme = GitHub::Markup.render(readme, files[readme])
+ end
+ end
+
def find
@rubygem = Rubygem.find_or_initialize_by_name(self.spec.name)
@version = @rubygem.find_or_initialize_version_from_spec(spec)
@@ -0,0 +1 @@
+*.pyc
@@ -0,0 +1,26 @@
+# take control of the growl notifications
+module GrowlHacks
+ def growl(type, subject, body, *args, &block)
+ case type
+ when Kicker::GROWL_NOTIFICATIONS[:succeeded]
+ puts subject = "Success"
+ body = body.split("\n").last
+ when Kicker::GROWL_NOTIFICATIONS[:failed]
+ subject = "Failure"
+ puts body
+ body = body.split("\n").last
+ else
+ return nil
+ end
+ super(type, subject, body, *args, &block)
+ end
+end
+
+Kicker.send :extend, GrowlHacks
+
+# no logging
+Kicker::Utils.module_eval do
+ def log(message)
+ nil
+ end
+end
@@ -0,0 +1,20 @@
+Copyright (c) 2009 GitHub
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,82 @@
+GitHub Markup
+=============
+
+We use this library on GitHub when rendering your README or any other
+rich text file.
+
+Want to contribute? Great! There are two ways to add markups.
+
+
+### Commands
+
+If your markup is in a language other than Ruby, drop a translator
+script in `lib/github/commands` which accepts input on STDIN and
+returns HTML on STDOUT. See [rest2html][r2h] for an example.
+
+Once your script is in place, edit `lib/github/markups.rb` and tell
+GitHub Markup about it. Again we look to [rest2html][r2hc] for
+guidance:
+
+ command(:rest2html, /rest|rst/)
+
+Here we're telling GitHub Markup of the existence of a `rest2html`
+command which should be used for any file ending in `rest` or
+`rst`. Any regular expression will do.
+
+Finally add your tests. Create a `README.extension` in `test/markups`
+along with a `README.extension.html`. As you may imagine, the
+`README.extension` should be your known input and the
+`README.extension.html` should be the desired output.
+
+Now run the tests: `rake`
+
+If nothing complains, congratulations!
+
+
+### Classes
+
+If your markup can be translated using a Ruby library, that's
+great. Check out Check `lib/github/markups.rb` for some
+examples. Let's look at Markdown:
+
+ markup(:markdown, /md|mkdn?|markdown/) do |content|
+ Markdown.new(content).to_html
+ end
+
+We give the `markup` method three bits of information: the name of the
+file to `require`, a regular expression for extensions to match, and a
+block to run with unformatted markup which should return HTML.
+
+If you need to monkeypatch a RubyGem or something, check out the
+included RDoc example.
+
+Tests should be added in the same manner as described under the
+`Commands` section.
+
+
+Usage
+-----
+
+ require 'github/markup'
+ GitHub::Markup.render('README.markdown', "* One\n* Two")
+
+Or, more realistically:
+
+ require 'github/markup'
+ GitHub::Markup.render(file, File.read(file))
+
+
+Contributing
+------------
+
+1. Fork it.
+2. Create a branch
+3. Commit your changes
+4. Push to the branch
+5. Create an [Issue][1] with a link to your branch
+6. Enjoy a refreshing Diet Coke and wait
+
+
+[r2h]: http://github.com/defunkt/github_markup/tree/master/lib/github/commands/rest2html
+[r2hc]: http://github.com/defunkt/github_markup/tree/master/lib/github/markups.rb#L13
+[1]: http://github.com/defunkt/github_markup/issues
@@ -0,0 +1,11 @@
+task :default => :test
+
+desc "Run tests"
+task :test do
+ Dir['test/**/*_test.rb'].each { |file| require file }
+end
+
+desc "Kick it"
+task :kick do
+ exec "kicker -e rake test lib"
+end
@@ -0,0 +1 @@
+require 'github/markup'
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+
+"""A small wrapper file for parsing AsciiDoc files at Github."""
+
+__author__ = "Devin Weaver"
+__copyright__ = "Copyright (C) 2009 Devin Weaver"
+__license__ = "Public Domain"
+__version__ = "0.1"
+
+"""
+github_asciidoc.py
+------------------
+
+This is a wrapper file for parsing AsciiDoc files at github. It wraps the
+current AsciiDoc API.
+
+AsciiDoc specifications suggest using the file extension of `.txt` however this
+causes conflict because there is no way to determine if a text file is an
+AsciiDoc or not without pre-processing the file. This gives us two simple
+options:
+
+1. **Parse all text files**. We could have all files ending in `.txt` or
+ ``README.txt`` be parsed through AsciiDoc. It will print pretty text fine
+ even if it isn't formatted as such. However this could be *not what the user
+ expects*.
+2. **Pick a unique extension**. We could pick a unique extension (i.e.
+ `.asciidoc`) to prevent clashing. Although not directly suggested by the
+ author of AsciiDoc there is no standard or practice to the contrary.
+
+Option two is recommended by myself.
+
+Requirements
+~~~~~~~~~~~~
+
+The AsciiDoc API comes in two parts. The first is the system installation of
+AsciiDoc which has a simple install_. The second part is the API script. You
+can either copy this to the current directory or the application's lib folder.
+There is more information on the `API page`_
+
+The `re` package is imported here for the purpose to accomplish E-Mail address
+cloaking. AsciiDoc does not offer it's own cloaking algorithm like docutils
+does. So I made a simple one here to do the same. **If the expense of regex's
+is too high it can be easily commented out.**
+
+.. tip::
+ AsciiDoc by default runs in *safe mode* which means it will not include
+ external files that are **not** in the same directory as the `infile`.
+ However since we use a StringIO through the API it should be based on the
+ current working directory.
+
+.. _install: http://www.methods.co.nz/asciidoc/userguide.html
+.. _API page: http://www.methods.co.nz/asciidoc/asciidocapi.html
+"""
+
+try:
+ import locale
+ locale.setlocale(locale.LC_ALL, '')
+except:
+ pass
+
+import sys
+import cStringIO # faster then StringIO
+from asciidocapi import AsciiDocAPI
+from asciidocapi import AsciiDocError
+import re # only needed to simulate cloak_email_addresses
+
+def main():
+ """
+ Parses the given AsciiDoc file or the redirected string input and returns
+ the HTML body.
+
+ Usage: asciidoc2html < README.rst
+ asciidoc2html README.rst
+ """
+ try:
+ text = open(sys.argv[1], 'r').read()
+ except IOError: # given filename could not be found
+ return ''
+ except IndexError: # no filename given
+ text = sys.stdin.read()
+
+ infile = cStringIO.StringIO(text)
+ outfile = cStringIO.StringIO()
+ asciidoc = AsciiDocAPI()
+ asciidoc.options('-s')
+
+ try:
+ asciidoc.execute(infile, outfile, 'xhtml11')
+ except AsciiDocError, strerror:
+ str = "%s" % (strerror)
+ str = str.replace("&", "&amp;") # Must be done first
+ str = str.replace("<", "%lt;")
+ str = str.replace(">", "%gt;")
+ outfile.write ("<blockquote><strong>AsciiDoc ERROR: %s</strong></blockquote>" % (str))
+
+ """
+ Cloak email addresses
+
+ AsciiDoc API does not have a `cloak_email_addresses` option. We can do the
+ same with a set of regex but that can be expensive. Keep section commented
+ to disable. So ``abc@mail.example.com`` becomes:
+
+ -----------
+ <a class="reference" href="mailto:abc&#37;&#52;&#48;mail&#46;example&#46;org">
+ abc<span>&#64;</span>mail<span>&#46;</span>example<span>&#46;</span>org</a>
+ -----------
+ """
+ def mangleEmail(matches):
+ email1 = "%s&#37;&#52;&#48;%s" % (matches.group(1), matches.group(2))
+ email1 = email1.replace(".", "&#46;")
+ email2 = "%s<span>&#64;</span>%s" % (matches.group(1), matches.group(2))
+ email2 = email2.replace(".", "<span>&#46;</span>")
+ return "<a class=\"reference\" href=\"mailto:%s\">%s</a>" % (email1, email2)
+
+ return re.sub(r'<a href="mailto:([^@]+)@([^@]+)">([^@]+)@([^@]+)</a>', mangleEmail, outfile.getvalue())
+ #return outfile.getvalue()
+
+if __name__ == '__main__':
+ print main()
Oops, something went wrong.

0 comments on commit 8220249

Please sign in to comment.