Permalink
Browse files

init attempt

  • Loading branch information...
0 parents commit 037d5b5c7f01fb7dddbd18313e0e3bf781402293 @siuying committed Jan 4, 2010
Showing with 200 additions and 0 deletions.
  1. +23 −0 .autotest
  2. +6 −0 History.txt
  3. +8 −0 Manifest.txt
  4. +57 −0 README.txt
  5. +8 −0 Rakefile
  6. +3 −0 bin/table_parser
  7. +71 −0 lib/table_parser.rb
  8. +24 −0 test/test_table_parser.rb
@@ -0,0 +1,23 @@
+# -*- ruby -*-
+
+require 'autotest/restart'
+
+# Autotest.add_hook :initialize do |at|
+# at.extra_files << "../some/external/dependency.rb"
+#
+# at.libs << ":../some/external"
+#
+# at.add_exception 'vendor'
+#
+# at.add_mapping(/dependency.rb/) do |f, _|
+# at.files_matching(/test_.*rb$/)
+# end
+#
+# %w(TestA TestB).each do |klass|
+# at.extra_class_map[klass] = "test/test_misc.rb"
+# end
+# end
+
+# Autotest.add_hook :run_command do |at|
+# system "rake build"
+# end
@@ -0,0 +1,6 @@
+=== 1.0.0 / 2010-01-04
+
+* 1 major enhancement
+
+ * Birthday!
+
@@ -0,0 +1,8 @@
+.autotest
+History.txt
+Manifest.txt
+README.txt
+Rakefile
+bin/table_parser
+lib/table_parser.rb
+test/test_table_parser.rb
@@ -0,0 +1,57 @@
+= table_parser
+
+* FIX (url)
+
+== DESCRIPTION:
+
+FIX (describe your package)
+
+== FEATURES/PROBLEMS:
+
+* FIX (list of features or problems)
+
+== SYNOPSIS:
+
+ FIX (code sample of usage)
+
+== REQUIREMENTS:
+
+* FIX (list of requirements)
+
+== INSTALL:
+
+* FIX (sudo gem install, anything else)
+
+== DEVELOPERS:
+
+After checking out the source, run:
+
+ $ rake newb
+
+This task will install any missing dependencies, run the tests/specs,
+and generate the RDoc.
+
+== LICENSE:
+
+(The MIT License)
+
+Copyright (c) 2010 FIX
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
+require 'rubygems'
+require 'hoe'
+
+Hoe.spec 'table_parser' do
+ developer('Francis Chong', 'francis@ignition.hk')
+ add_dependencies()
+end
+
@@ -0,0 +1,3 @@
+#!/usr/bin/env ruby
+
+abort "you need to write me"
@@ -0,0 +1,71 @@
+require 'rubygems'
+require 'nokogiri'
+require 'open-uri'
+
+module TableParser
+ VERSION = '0.1.0'
+
+ class Parser
+ def parse(input, xpath_to_table="//table[0]")
+ table = extract_table(input, xpath_to_table)
+
+ headers = extract_headers(table)
+ contents = extract_content(table)
+
+ data = []
+ headers.each do |h|
+ data << {:name => h, :data => []}
+ end
+
+ contents.each do |row|
+ puts "row/#{row.length}"
+ end
+
+ contents
+ end
+
+ private
+ # extract_table("http://www.bs4.jp/table/index.html", "/html/body/table/tr/td/table")
+ def extract_table(input, xpath)
+ doc = Nokogiri::HTML(input)
+
+ rows = []
+ table = doc.xpath(xpath)
+ rows = table.xpath("./tr").collect do |row|
+ row.xpath("./td").collect do |col|
+ col
+ end
+ end
+ end
+
+ def extract_headers(rows)
+ headers = []
+ rows.first.collect do |col|
+ headers << col.text
+ end
+ headers
+ end
+
+ def extract_content(rows)
+ data = rows.clone
+ for i in (0..data.length-1)
+ row = data[i]
+ for j in (0..row.length-1)
+ col = row[j]
+ if !col.nil? && col.class != String
+ rowspan = col["rowspan"].to_i rescue 1
+ row[j] = col.text || " "
+ if rowspan > 1
+ rowspan -= 1
+ for addrow in (1..rowspan)
+ data[i + addrow].insert(j+1, col.text)
+ end
+ end
+ end
+ end
+ end
+ data[0..10]
+ end
+
+ end
+end
@@ -0,0 +1,24 @@
+require "test/unit"
+require "lib/table_parser"
+
+class TestTableParser < Test::Unit::TestCase
+ def test_parse_simple
+ parser = TableParser::Parser.new
+ table = parser.parse "<html><body><table><tr><td>A</td><td>B</td></tr>\
+ <tr><td rowspan=\"2\">1</td><td>2</td></tr> \
+ <tr><td>3</td></tr></table></body></html>",
+ "/html/body/table"
+
+ assert(table.size == 2, 'number of row should = 2 ')
+ assert(table[0].size == 2, 'number of col of row 1 = 2 ')
+ assert(table[1].size == 2, 'number of col of row 2 = 2 ')
+ end
+
+ def test_parse_complex
+ parser = TableParser::Parser.new
+ table = parser.parse open("http://www.bs4.jp/table/index.html").read,
+ "/html/body/table/tr/td/table"
+ end
+
+
+end

0 comments on commit 037d5b5

Please sign in to comment.