From 4621b31b62d11676caa48069b67aaed22c67338e Mon Sep 17 00:00:00 2001 From: Dmitry Gutov Date: Mon, 13 May 2013 02:42:46 +0400 Subject: [PATCH 1/2] Convert relative URLs in feed contents into absolute ones --- app/repositories/story_repository.rb | 23 ++++++++++++++-- spec/repositories/story_repository_spec.rb | 32 ++++++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 spec/repositories/story_repository_spec.rb diff --git a/app/repositories/story_repository.rb b/app/repositories/story_repository.rb index d94e4edc2..7c027d211 100644 --- a/app/repositories/story_repository.rb +++ b/app/repositories/story_repository.rb @@ -3,10 +3,12 @@ class StoryRepository def self.add(entry, feed) + url = entry.url + content = extract_content(entry) Story.create(feed: feed, title: entry.title, - permalink: entry.url, - body: StoryRepository.extract_content(entry), + permalink: url, + body: urls_to_absolute(content, url), is_read: false, published: entry.published || Time.now) end @@ -45,6 +47,21 @@ def self.extract_content(entry) end end + def self.urls_to_absolute(content, base_url) + doc = Nokogiri::HTML.fragment(content) + abs_re = URI::DEFAULT_PARSER.regexp[:ABS_URI] + [["a", "href"], ["img", "src"], ["video", "src"]].each do |tag, attr| + doc.css(tag).each do |node| + url = node.get_attribute(attr) + unless url =~ abs_re + node.set_attribute(attr, URI.join(base_url, url).to_s) + URI.parse(url) + end + end + end + doc.to_html + end + def self.samples [ SampleStory.new("Darin' Fireballs", "Why you should trade your firstborn for a Retina iPad"), @@ -52,4 +69,4 @@ def self.samples SampleStory.new("Lambda Da Ultimate", "Flimsy types are the new hotness") ] end -end \ No newline at end of file +end diff --git a/spec/repositories/story_repository_spec.rb b/spec/repositories/story_repository_spec.rb new file mode 100644 index 000000000..6d86ba70e --- /dev/null +++ b/spec/repositories/story_repository_spec.rb @@ -0,0 +1,32 @@ +require "spec_helper" +app_require "repositories/story_repository" + +describe StoryRepository do + klass = described_class + + describe ".urls_to_absolute" do + it "preserves existing absolute urls" do + content = 'bar' + expect(klass.urls_to_absolute(content, nil)).to eq(content) + end + + it "replaces relative urls in a, img and video tags" do + content = <<-EOS +
+ +tee + + +
+ EOS + expect(klass.urls_to_absolute(content, "http://oodl.io/d/")).to eq(<<-EOS) +
+ +tee + + +
+ EOS + end + end +end From 3ffeefab919ab7973fccc1f83d7b8e10248436d0 Mon Sep 17 00:00:00 2001 From: Dmitry Gutov Date: Mon, 13 May 2013 03:17:33 +0400 Subject: [PATCH 2/2] urls_to_absolute spec: remove newlines before comparing --- spec/repositories/story_repository_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/repositories/story_repository_spec.rb b/spec/repositories/story_repository_spec.rb index 6d86ba70e..ddfe82123 100644 --- a/spec/repositories/story_repository_spec.rb +++ b/spec/repositories/story_repository_spec.rb @@ -14,12 +14,12 @@ content = <<-EOS
-tee - +tee
EOS - expect(klass.urls_to_absolute(content, "http://oodl.io/d/")).to eq(<<-EOS) + expect(klass.urls_to_absolute(content, "http://oodl.io/d/").gsub(/\n/, "")) + .to eq((<<-EOS).gsub(/\n/, ""))
tee