Skip to content
Browse files

normalize haikus and skip the suspicious ones

  • Loading branch information...
1 parent 948b3ac commit 935333ff52d73d9201251c7a6e970b48de126f39 @mfilej committed Feb 12, 2010
Showing with 52 additions and 15 deletions.
  1. +18 −2 Rakefile
  2. +1 −1 haiku.lua
  3. +4 −3 haiku.tex
  4. +29 −9 lib/haiku.rb
View
20 Rakefile
@@ -1,3 +1,11 @@
+require 'rake/testtask'
+
+Rake::TestTask.new do |t|
+ t.libs << "test"
+ t.test_files = FileList['test/*_test.rb']
+ t.verbose = true
+end
+
task :default => :build
task :build => :clean do
@@ -9,10 +17,18 @@ task :clean do
end
task :fetch do
- rm_if_exist "data/haiku.atom"
- system "curl -o data/haiku.atom http://search.twitter.com/search.atom?q=%23haiku"
+ rm_if_exist "data/source.atom"
+ system %x{curl -o data/source.atom "http://search.twitter.com/search.atom?q=%23haiku&rpp=50"}
end
+task :normalize do
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
+ require "haiku"
+ source = open "data/source.atom"
+ File.open("data/haiku.atom", "w") do |f|
+ f << Haiku.rewrite(source)
+ end
+end
def rm_if_exist(paths)
Array(paths).each do |path|
View
2 haiku.lua
@@ -2,7 +2,7 @@ haiku = haiku or {}
local p = tex.sprint
function haiku.parse(tweet)
- p("MWA* " .. tweet .. " *MWA")
+ p(tweet)
end
function haiku.author(str)
View
7 haiku.tex
@@ -24,9 +24,10 @@
\startxmlsetups xml:atom:entry
\def\AUTHOR{\xmlfirst{#1}{/author/name}}
\def\HAIKU{\xmlfirst{#1}{/title}}
- \startframedtext[corner=round,align=middle,frame=none,background=screen,backgroundscreen=.9,width=\textwidth]
- From: \authorname{\AUTHOR}\par
- \parsehaiku{ /MAA }
+ \startframedtext[align=middle,frame=none,background=none,width=\textwidth]
+ \parsehaiku{\HAIKU}\par
+ ---\authorname{\AUTHOR}\par
+
\stopframedtext
\stopxmlsetups
View
38 lib/haiku.rb
@@ -3,19 +3,39 @@
class Haiku
def self.rewrite(data)
- doc = Nokogiri::XML(data)
+ Rewriter.new(data).rewrite
+ end
+
+ class Rewriter
+
+ attr_reader :data, :hashes
+
+ def initialize(data)
+ @data = data
+ @hashes = []
+ end
+
+ def rewrite
+ doc = Nokogiri::XML(data)
- entries = doc.css "entry"
+ entries = doc.css "entry"
- entries.each do |entry|
- title = entry.at("title")
- raw = title.inner_html
- h = Haiku::Normalizer.normalize(raw)
- next unless h.haiku?
- title.content = h
+ entries.each do |entry|
+ title = entry.at("title")
+ raw = title.inner_html
+ h = Haiku::Normalizer.normalize(raw)
+ hash = h.hash
+ if h.haiku? and !hashes.include?(hash)
+ title.content = h
+ hashes << hash
+ else
+ entry.remove
+ end
+ end
+
+ doc
end
- doc
end
end

0 comments on commit 935333f

Please sign in to comment.
Something went wrong with that request. Please try again.