timrogers · timrogers · Aug 22, 2013 · Aug 19, 2013 · Aug 19, 2013 · Aug 19, 2013
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+spec/support/cassettes
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -8,6 +8,9 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
+    addressable (2.3.5)
+    crack (0.4.1)
+      safe_yaml (~> 0.9.0)
     diff-lcs (1.2.4)
     httparty (0.11.0)
       multi_json (~> 1.0)
@@ -28,6 +31,11 @@ GEM
     rspec-expectations (2.14.2)
       diff-lcs (>= 1.1.3, < 2.0)
     rspec-mocks (2.14.3)
+    safe_yaml (0.9.5)
+    vcr (2.5.0)
+    webmock (1.11.0)
+      addressable (>= 2.2.7)
+      crack (>= 0.3.2)
 
 PLATFORMS
   ruby
@@ -36,3 +44,5 @@ DEPENDENCIES
   mocha (~> 0.14.0)
   rapgenius!
   rspec (~> 2.14.1)
+  vcr (~> 2.5.0)
+  webmock (~> 1.11.0)
diff --git a/lib/rapgenius/annotation.rb b/lib/rapgenius/annotation.rb
@@ -26,11 +26,12 @@ def explanation
     end
 
     def song
-      entry_path = document.css('meta[property="rap_genius:song"]').
-        attr('content').to_s
-
-      @song ||= Song.new(entry_path)
+      @song ||= Song.new(song_url)
     end
 
+    def song_url
+      @song_url ||= document.css('meta[property="rap_genius:song"]').
+        attr('content').to_s
+    end
   end
-end
+end
diff --git a/lib/rapgenius/scraper.rb b/lib/rapgenius/scraper.rb
@@ -3,33 +3,55 @@
 
 module RapGenius
   module Scraper
-    BASE_URL = "http://rapgenius.com/".freeze
+    # Custom HTTParty parser that parses the returned body with Nokogiri
+    class NokogiriParser < HTTParty::Parser
+      SupportedFormats.merge!('text/html' => :html)
 
-    attr_reader :url
+      def html
+        Nokogiri::HTML(body)
+      end
+    end
+
+    # HTTParty client
+    #
+    # Sets some useful defaults for all of our requests.
+    #
+    # See Scraper#fetch
+    class Client
+      include HTTParty
+
+      format   :html
+      parser   NokogiriParser
+      base_uri 'http://rapgenius.com'
+      headers  'User-Agent' => "rapgenius.rb v#{RapGenius::VERSION}"
+    end
 
+    BASE_URL = Client.base_uri + "/".freeze
+
+    attr_reader :url
 
     def url=(url)
-      if !(url =~ /^https?:\/\//)
-        @url = "#{BASE_URL}#{url}" 
+      unless url =~ /^https?:\/\//
+        @url = BASE_URL + url
       else
         @url = url
       end
     end
 
     def document
-      @document ||= Nokogiri::HTML(fetch(@url))
+      @document ||= fetch(@url)
     end
 
     private
+
     def fetch(url)
-      response = HTTParty.get(url)
+      response = Client.get(url)
 
       if response.code != 200
         raise ScraperError, "Received a #{response.code} HTTP response"
       end
 
-      response.body
+      response.parsed_response
     end
-
   end
-end
+end
diff --git a/lib/rapgenius/song.rb b/lib/rapgenius/song.rb
@@ -11,7 +11,6 @@ def initialize(path)
       self.url = path
     end
 
-
     def artist
       document.css('.song_title a').text
     end
@@ -43,7 +42,5 @@ def annotations
         )
       end
     end
-
-
   end
-end
+end
diff --git a/rapgenius.gemspec b/rapgenius.gemspec
@@ -14,13 +14,15 @@ Gem::Specification.new do |s|
     "working at Rap Genius is the API". With this magical screen-scraping gem,
     you can access the wealth of data on the internet Talmud in Ruby.}
 
-  s.add_runtime_dependency "nokogiri", "~>1.6.0"
-  s.add_runtime_dependency "httparty", "~>0.11.0"
-  s.add_development_dependency "rspec", "~>2.14.1"
-  s.add_development_dependency "mocha", "~>0.14.0"
+  s.add_runtime_dependency "nokogiri",    "~>1.6.0"
+  s.add_runtime_dependency "httparty",    "~>0.11.0"
+  s.add_development_dependency "rspec",   "~>2.14.1"
+  s.add_development_dependency "mocha",   "~>0.14.0"
+  s.add_development_dependency "webmock", "~>1.11.0"
+  s.add_development_dependency "vcr",     "~>2.5.0"
 
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = ["lib"]
-end
+end
diff --git a/spec/annotation_spec.rb b/spec/annotation_spec.rb
diff --git a/spec/rapgenius/annotation_spec.rb b/spec/rapgenius/annotation_spec.rb
@@ -0,0 +1,41 @@
+require 'spec_helper'
+
+module RapGenius
+  describe Annotation, vcr: {cassette_name: "big-sean-annotation"} do
+
+    let(:annotation) { described_class.new(id: "2092393") }
+    subject { annotation }
+
+    its(:id)       { should eq "2092393" }
+    its(:url)      { should eq "http://rapgenius.com/2092393" }
+    its(:song)     { should be_a Song }
+    its(:song_url) { should eq "http://rapgenius.com/Big-sean-control-lyrics" }
+
+    describe "#lyric" do
+      it "should have the correct lyric" do
+        annotation.lyric.should eq "You gon' get this rain like it's May weather,"
+      end
+    end
+
+    describe "#explanation" do
+      it "should have the correct explanation" do
+        annotation.explanation.should include "making it rain"
+      end
+    end
+
+    describe '.find' do
+      it "returns a new instance at the specified path" do
+        i = described_class.find("foobar")
+        i.should be_an Annotation
+        i.id.should eq "foobar"
+      end
+    end
+
+    context "with additional parameters passed into the constructor" do
+      let(:annotation) { described_class.new(id: "5678", lyric: "foo") }
+
+      its(:id)    { should eq "5678" }
+      its(:lyric) { should eq "foo" }
+    end
+  end
+end
diff --git a/spec/rapgenius/scraper_spec.rb b/spec/rapgenius/scraper_spec.rb
@@ -0,0 +1,54 @@
+require 'spec_helper'
+
+class ScraperTester
+  include RapGenius::Scraper
+end
+
+module RapGenius
+  describe Scraper do
+
+    let(:scraper) { ScraperTester.new }
+
+    describe "#url=" do
+      it "forms the URL with the base URL, if the current path is relative" do
+        scraper.url = "foobar"
+        scraper.url.should include RapGenius::Scraper::BASE_URL
+      end
+
+      it "leaves the URL as it is if already complete" do
+        scraper.url = "http://foobar.com/baz"
+        scraper.url.should eq "http://foobar.com/baz"
+      end
+    end
+
+    describe "#document" do
+      before do
+        scraper.url = "http://foo.bar/"
+      end
+
+      context "with a successful request" do
+        before do
+          stub_request(:get, "http://foo.bar").to_return({body: 'ok', status: 200})
+        end
+
+        it "returns a Nokogiri document object" do
+          scraper.document.should be_a Nokogiri::HTML::Document
+        end
+
+        it "contains the tags in page received back from the HTTP request" do
+          scraper.document.css('body').length.should eq 1
+        end
+      end
+
+      context "with a failed request" do
+        before do
+          stub_request(:get, "http://foo.bar").to_return({body: '', status: 404})
+        end
+
+        it "raises a ScraperError" do
+          expect { scraper.document }.to raise_error(RapGenius::ScraperError)
+        end
+      end
+    end
+  end
+end
diff --git a/spec/rapgenius/song_spec.rb b/spec/rapgenius/song_spec.rb
@@ -0,0 +1,46 @@
+require 'spec_helper'
+
+module RapGenius
+  describe Song do
+    context "given Big Sean's Control", vcr: {cassette_name: "big-sean-control-lyrics"} do
+      subject { described_class.new("Big-sean-control-lyrics") }
+
+      its(:url)         { should eq "http://rapgenius.com/Big-sean-control-lyrics" }
+      its(:title)       { should eq "Control" }
+      its(:artist)      { should eq "Big Sean" }
+      its(:description) { should include "blew up the Internet" }
+      its(:full_artist) { should include "(Ft. Jay Electronica & Kendrick Lamar)"}
+
+      describe "#images" do
+        it "should be an Array" do
+          subject.images.should be_an Array
+        end
+
+        it "should include Big Sean's picture" do
+          subject.images.should include "http://s3.amazonaws.com/rapgenius/1375029260_Big%20Sean.png"
+        end
+      end
+
+      describe "#annotations" do
+        it "should be an Array of Annotation objects" do
+          subject.annotations.should be_an Array
+          subject.annotations.first.should be_a Annotation
+        end
+
+        it "should be of a valid length" do
+          # Annotations get added and removed from the live site; we want our
+          # count to be somewhat accurate, within reason.
+          subject.annotations.length.should be_within(15).of(130)
+        end
+      end
+    end
+
+    describe '.find' do
+      it "returns a new instance at the specified path" do
+        i = described_class.find("foobar")
+        i.should be_a Song
+        i.url.should eq 'http://rapgenius.com/foobar'
+      end
+    end
+  end
+end