Initial commit with original code from https://gist.github.com/33900

sikachu · Nov 21, 2011 · 86e83a0 · 86e83a0
commit 86e83a0
Showing 1 changed file with 96 additions and 0 deletions.
diff --git a/comtoon.rb b/comtoon.rb
@@ -0,0 +1,96 @@
+require 'rubygems'
+require 'hpricot'
+require 'iconv'
+require 'time'
+require 'builder'
+require 'active_support/ordered_hash'
+include ActiveSupport
+
+THAI_MONTHS = [nil] + %w(มกราคม กุมภาพันธ์ มีนาคม เมษายน พฤษภาคม มิถุนายน กรกฎาคม สิงหาคม กันยายน ตุลาคม พฤษจิกายน ธันวาคม)
+
+# fetch header and stuff
+cookie = `curl -i http://www.comtoon.com/v3/releaseChk.asp`.match(/Set-Cookie: (.+); path=/)[1]
+
+# trigger stupid page
+`curl -b #{cookie} http://www.comtoon.com/v3/release.asp`
+
+# and then calling the actual data!
+result = `curl -b #{cookie} http://www.comtoon.com/database/w/hl/ct_index.asp`.match(/function showrelease\(\)\{x= "(.+)"; return \(x\);\}/)[1].gsub(/(&nbsp;| )+/, ' ')
+doc = Hpricot.parse(Iconv.conv('utf8', 'tis620', result))
+date = Time.parse("0:00")
+publisher = ""
+data = OrderedHash.new
+
+(doc/"tr").each do |tr|
+  if tr[:bgcolor] == "#FFFF99"
+    # encouter a new date row
+    date_match = tr.innerText.match /ที่ ([0-9]{1,2}) (.+) .+ ([0-9]{1,4})/
+    date = Time.mktime((date_match[3].to_i - 543), THAI_MONTHS.index(date_match[2]), date_match[1].to_i, 8).xmlschema[0...10]
+  elsif tr[:bgcolor] == "#99CCFF"
+    # encouter a new publisher row
+    publisher = tr.innerText
+  else
+    # comic row, add a new object to hash!
+    data[date] ||= OrderedHash.new
+    data[date][publisher] ||= []
+    data[date][publisher] += [tr.innerText.strip]
+  end
+end
+
+# YAML type, deprecated
+# data.each do |i| (date, publishers = i)
+#   puts ">> #{date}"
+#   output = ""
+#   publishers.each do |j| (publisher, comics = j)
+#     output += "\"#{publisher}\":\n"
+#     comics.each do |comic|
+#       output += "  - \"#{comic}\"\n"
+#     end
+#   end
+#   puts output
+# end
+
+data.each do |i| (date, publishers = i)
+  output = ""
+  publishers.each do |j| (publisher, comics = j)
+    output += "<strong>#{publisher}</strong><ul>"
+    comics.each do |comic|
+      output += "<li>#{comic}</li>"
+    end
+    output += "</ul>"
+  end
+  output += "<p>ที่มา: <a target='_blank' href='http://www.comtoon.com'>Comtoon.com</a></p>"
+
+  # write to file
+  File.open("data/#{date}.html", 'w+') {|f| f.write(output) }
+end
+
+# generate XML output
+xml = Builder::XmlMarkup.new
+xml.instruct!
+xml.rss :version => "2.0", "xmlns:atom" => "http://www.w3.org/2005/Atom" do
+  xml.channel do
+    xml.title "Thai comic update"
+    xml.link "http://feedproxy.google.com/ThaiComicUpdate"
+    xml.description "Daily Thailand's comic release update. However, please note that this feed might be broken anytime. In case that happend, contact me at http://sikachu.com :)"
+    xml.generator "RubyXMLBuilder"
+    xml.language "th"
+    xml.atom :link, :type => "application/rss+xml", :rel => "self", :href => "http://comic.dev.7republic.com/rss.xml"
+
+    # load files
+    Dir["data/*.html"].sort{|x,y| y <=> x }.each do |filename|
+      d = filename.match /([0-9]{4})-([0-9]{2})-([0-9]{2})\.html/
+      time = Time.mktime(d[1].to_i, d[2].to_i, d[3].to_i, 1)
+      xml.item do
+        xml.title "หนังสือการ์ตูนออกใหม่วันที่ #{time.strftime("%d/%m/%Y")}"
+        xml.description do
+          xml << "<![CDATA[" << File.read(filename) << "]]>"
+        end
+        xml.guid "comic##{d[1]}-#{d[2]}-#{d[3]}", :isPermaLink => "false"
+        xml.pubDate time.utc.rfc822
+      end
+    end
+  end
+end
+
+File.open("rss.xml", 'w+') {|f| f.write(xml.target!) }