Permalink
Browse files

Replacing Calais with term-extract. Adding binstubs.

  • Loading branch information...
1 parent f5adb2e commit 42578fcf547a868d51b4c9f5a577fbc8616bddbf @sshingler committed May 31, 2011
Showing with 144 additions and 46 deletions.
  1. +5 −8 Gemfile.lock
  2. +14 −0 bin/convert_to_should_syntax
  3. +14 −0 bin/edit_json.rb
  4. +14 −0 bin/minitar
  5. +14 −0 bin/nokogiri
  6. +14 −0 bin/prettify_json.rb
  7. +14 −0 bin/rake
  8. +14 −0 bin/rdebug
  9. +14 −0 bin/term-extract
  10. +14 −0 bin/testrb
  11. +1 −1 jkl.gemspec
  12. +5 −1 lib/jkl.rb
  13. +0 −28 lib/jkl/calais_client.rb
  14. +7 −8 test/unit/jkl_test.rb
View
@@ -2,30 +2,25 @@ PATH
remote: .
specs:
jakal (0.2.0)
- calais (>= 0.0.11)
mechanize (>= 1.0.0)
nokogiri (~> 1.4.4)
rake (>= 0.8.7)
+ term-extract (~> 0.5.1)
GEM
remote: http://rubygems.org/
specs:
addressable (2.2.6)
archive-tar-minitar (0.5.2)
- calais (0.0.11)
- curb (>= 0.1.4)
- json (>= 1.1.3)
- nokogiri (>= 1.3.3)
columnize (0.3.2)
crack (0.1.8)
- curb (0.7.15)
- json (1.5.1)
linecache19 (0.5.12)
ruby_core_source (>= 0.1.4)
mechanize (1.0.0)
nokogiri (>= 1.2.1)
nokogiri (1.4.4)
rake (0.8.7)
+ rbtagger (0.4.7)
ruby-debug-base19 (0.11.25)
columnize (>= 0.3.1)
linecache19 (>= 0.5.11)
@@ -37,6 +32,8 @@ GEM
ruby_core_source (0.1.5)
archive-tar-minitar (>= 0.5.2)
shoulda (2.11.3)
+ term-extract (0.5.1)
+ rbtagger
test-unit (2.3.0)
webmock (1.6.2)
addressable (>= 2.2.2)
@@ -46,12 +43,12 @@ PLATFORMS
ruby
DEPENDENCIES
- calais (>= 0.0.11)
jakal!
mechanize (>= 1.0.0)
nokogiri (~> 1.4.4)
rake (>= 0.8.7)
ruby-debug19 (= 0.11.6)
shoulda (= 2.11.3)
+ term-extract (~> 0.5.1)
test-unit (= 2.3.0)
webmock (= 1.6.2)
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'convert_to_should_syntax' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('shoulda', 'convert_to_should_syntax')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'edit_json.rb' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('json', 'edit_json.rb')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'minitar' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('archive-tar-minitar', 'minitar')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'nokogiri' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('nokogiri', 'nokogiri')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'prettify_json.rb' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('json', 'prettify_json.rb')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'rake' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('rake', 'rake')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'rdebug' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('ruby-debug19', 'rdebug')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'term-extract' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('term-extract', 'term-extract')
View
@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'testrb' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", __FILE__)
+
+require 'rubygems'
+require 'bundler/setup'
+
+load Gem.bin_path('test-unit', 'testrb')
View
@@ -15,5 +15,5 @@ Gem::Specification.new do |s|
s.add_dependency(%q<rake>, [">= 0.8.7"])
s.add_dependency(%q<mechanize>, [">= 1.0.0"])
s.add_dependency(%q<nokogiri>, ["~> 1.4.4"])
- s.add_dependency(%q<calais>, [">= 0.0.11"])
+ s.add_dependency(%q<term-extract>, ["~> 0.5.1"])
end
View
@@ -1,8 +1,8 @@
require_relative "jkl/rss_client"
-require_relative "jkl/calais_client"
require_relative "jkl/text_client"
require "mechanize"
+require "term-extract"
module Jkl
class << self
@@ -18,5 +18,9 @@ def links(feed)
yield link if block_given?
end
end
+
+ def tags(text)
+ TermExtract.extract(text.force_encoding("UTF-8")).keys
+ end
end
end
View
@@ -1,28 +0,0 @@
-require "calais"
-
-module Jkl
- module Extraction
- class << self
-
- def calais_response(key, text)
- Calais.process_document(
- :content => text,
- :license_id => key
- )
- end
-
- def entities(key,text)
- calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
- end
-
- def tags(key, text)
- nested_list = {}
- entities(key,text).each do |a|
- nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
- end
- nested_list
- end
-
- end
- end
-end
View
@@ -2,13 +2,12 @@
require "shoulda"
require "webmock/test_unit"
require "yaml"
-
require_relative "../../lib/jkl"
class JklTest < Test::Unit::TestCase
include WebMock::API
- context "for documents, plain text and tags" do
+ context "Jkl: When handling documents, plain text and tags" do
setup do
@url = "http://www.bbc.co.uk"
response = File.read('test/fixtures/bbc_story.html')
@@ -18,27 +17,27 @@ class JklTest < Test::Unit::TestCase
:headers => {'Content-Type' => 'text/html'})
end
- should "Get a document from a URL" do
+ should "get a document from a URL" do
doc = Jkl::get(@url)
assert_not_nil doc
end
- should "Get the plain text version of a document" do
+ should "get the plain text version of a document" do
document = Jkl::get(@url)
text = Jkl::Text::plain_text(document,2)
assert_equal 8884, text.length
end
- should "Get the keywords from a document" do
+ should "get the keywords from a document" do
document = Jkl::get(@url)
text = Jkl::Text::plain_text(document,2)
- tags = Jkl::Extraction::tags(calais_key, text)
+ tags = Jkl::tags(text)
assert ! tags.empty?
end
end
- context "for RSS" do
- should "Get links from a feed" do
+ context "Jkl: When handling RSS" do
+ should "get links from a feed" do
feed = "http://feeds.bbci.co.uk/news/rss.xml"
response = File.read('test/fixtures/topix_rss.xml')
stub_request(:get, "http://feeds.bbci.co.uk/news/rss.xml").

0 comments on commit 42578fc

Please sign in to comment.