Skip to content

Commit

Permalink
remove trailing slash on paths
Browse files Browse the repository at this point in the history
  • Loading branch information
igrigorik committed Mar 31, 2011
1 parent 555e7ae commit a92af06
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 10 deletions.
1 change: 1 addition & 0 deletions lib/postrank-uri.rb
Expand Up @@ -125,6 +125,7 @@ def hash(uri)
def normalize(uri)
u = parse(uri)
u.path = u.path.squeeze('/')
u.path = u.path.chomp('/') if u.path.size != 1
u.query = nil if u.query && u.query.empty?
u.fragment = nil
u
Expand Down
2 changes: 1 addition & 1 deletion lib/postrank-uri/version.rb
@@ -1,5 +1,5 @@
module PostRank
module URI
VERSION = "1.0.7"
VERSION = "1.0.8"
end
end
6 changes: 3 additions & 3 deletions spec/c18n_hosts.yml
Expand Up @@ -12,7 +12,7 @@
- http://www.nytimes.com/2011/03/15/business/media/15adco.html

- - http://networkeffect.allthingsd.com/20110308/googles-approach-to-social/?mod=tweet
- http://networkeffect.allthingsd.com/20110308/googles-approach-to-social/
- http://networkeffect.allthingsd.com/20110308/googles-approach-to-social

- - http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html?mod=WSJ_hp_LEFTWhatsNewsCollection
- http://online.wsj.com/article/SB10001424052748704657704576150191661959856.html
Expand All @@ -21,7 +21,7 @@
- http://diepresse.com/home/wirtschaft/636448/Griechenland_Drachme-als-letzte-Rettung

- - http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/?partner=rss&emc=rss
- http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science/
- http://dotearth.blogs.nytimes.com/2010/12/14/beyond-political-science

- - http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html?nav=rss_email/components
- http://www.washingtonpost.com/wp-dyn/content/article/2010/12/14/AR2010121406045.html
Expand All @@ -48,7 +48,7 @@
- http://www.dw-world.de/dw/article/0,,6330472,00.html

- - http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/?rss
- http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565/
- http://www.repubblica.it/rubriche/il-caso-del-giorno/2010/12/13/news/riscossa_aeffe-10153565

- - http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html?wtmc=RSS.Sport.Fussball
- http://www.welt.de/sport/Der-Hoellenritt-des-Fussball-Profis-Jean-Marc-Bosman.html
Expand Down
23 changes: 17 additions & 6 deletions spec/postrank-uri_spec.rb
Expand Up @@ -86,6 +86,16 @@ def n(uri)
n('IGVITA.COM/ABC').should == (igvita + "ABC")
end

it "should remove trailing slash on paths" do
n('http://igvita.com/').should == 'http://igvita.com/'

n('http://igvita.com/a').should == 'http://igvita.com/a'
n('http://igvita.com/a/').should == 'http://igvita.com/a'

n('http://igvita.com/a/b').should == 'http://igvita.com/a/b'
n('http://igvita.com/a/b/').should == 'http://igvita.com/a/b'
end

end

context "canonicalization" do
Expand Down Expand Up @@ -125,7 +135,7 @@ def c(uri)
context "embedded links" do
it "should extract embedded redirects from Google News" do
u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111/'
u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111'
end

it "should extract embedded redirects from xfruits.com" do
Expand All @@ -135,7 +145,7 @@ def c(uri)

it "should extract embedded redirects from MySpace" do
u = c('http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fghanaian-chronicle.com%2Fnews%2Fother-news%2Fcanadian-high-commissioner-urges-media%2F&t=Canadian%20High%20Commissioner%20urges%20media')
u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media/'
u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media'
end
end
end
Expand Down Expand Up @@ -172,11 +182,12 @@ def h(uri)
end

it "should compute MD5 hash of the normalized URI" do
hash = '021a1032b1ea631a7c33d1a0ccc562bf'
hash = '55fae8910d312b7878a3201ed653b881'

h('http://EverBurnign.Com/feed/post/1').should == hash
h('Everburnign.com/feed/post/1').should == hash
h('everburnign.com/feed/post/1').should == hash
h('http://EverBurning.Com/feed/post/1').should == hash
h('Everburning.com/feed/post/1').should == hash
h('everburning.com/feed/post/1').should == hash
h('everburning.com/feed/post/1/').should == hash
end
end

Expand Down

0 comments on commit a92af06

Please sign in to comment.