Skip to content

Commit

Permalink
Merge branch 'master' of github.com:articlemetrics/lagotto
Browse files Browse the repository at this point in the history
* 'master' of github.com:articlemetrics/lagotto:
  simplify checks for normalized URL
  adjusted validation of work url. #130
  change api routes for new default param in source, publisher
  updated API documentation for minor changes in v5 API
  • Loading branch information
Martin Fenner committed Dec 18, 2014
2 parents 6f6f909 + 327560a commit bacb103
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 21 deletions.
19 changes: 6 additions & 13 deletions app/models/concerns/resolvable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,17 @@ def get_canonical_url(url, options = { timeout: 120 })
end

if body_url
# remove percent encoding
body_url = CGI.unescape(body_url)

# make URL lowercase
body_url = body_url.downcase
# normalize URL, e.g. remove percent encoding and make URL lowercase
body_url = PostRank::URI.clean(body_url)

# remove parameter used by IEEE
body_url = body_url.sub("reload=true&", "")
end

url = response.env[:url].to_s
if url
# remove percent encoding
url = CGI.unescape(url)

# make URL lowercase
url = url.downcase

# remove jsessionid used by J2EE servers
url = url.gsub(/(.*);jsessionid=.*/, '\1')
# normalize URL, e.g. remove percent encoding and make URL lowercase
url = PostRank::URI.clean(url)

# remove parameter used by IEEE
url = url.sub("reload=true&", "")
Expand Down Expand Up @@ -70,6 +61,8 @@ def get_canonical_url(url, options = { timeout: 120 })

def get_normalized_url(url)
PostRank::URI.clean(url)
rescue Addressable::URI::InvalidURIError => e
{ error: e.message }
end

def get_url_from_doi(doi)
Expand Down
8 changes: 7 additions & 1 deletion app/models/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,13 @@ def sanitize_title
def normalize_url
return nil if canonical_url.blank?

self.canonical_url = get_normalized_url(canonical_url)
url = get_normalized_url(canonical_url)
if url.is_a?(Hash)
self.canonical_url = canonical_url
errors.add :canonical_url, url.fetch(:error)
else
self.canonical_url = url
end
end

# pid is required, use doi, pmid, pmcid, or canonical url in that order
Expand Down
4 changes: 2 additions & 2 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@

namespace :v5 do
resources :works, path: "articles", constraints: { :id => /.+?/ }, only: [:index]
resources :sources, only: [:index, :show]
resources :sources, only: [:index, :show], param: :name
get "status", to: "status#show"
resources :api_requests, only: [:index]
resources :publishers, only: [:index]
resources :publishers, only: [:index], param: :member_id
end
end

Expand Down
8 changes: 4 additions & 4 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,22 +94,22 @@ With the **detail** parameter all raw data sent by the source are provided. The
/api/v5/articles?api_key=API_KEY&ids=10.1371%2Fjournal.pone.0036240,10.1371%2Fjournal.pbio.0020413&info=detail
```

### source=x
### source_id=x
Only provide metrics for a given source. The response format is the same as the default response.

```sh
/api/v5/articles?api_key=API_KEY&ids=10.1371%2Fjournal.pone.0036240,10.1371%2Fjournal.pbio.0020413&source=mendeley
/api/v5/articles?api_key=API_KEY&ids=10.1371%2Fjournal.pone.0036240,10.1371%2Fjournal.pbio.0020413&source_id=mendeley
```

### order=x

Results are sorted by descending event count when given the source name, e.g. `&order=wikipedia`. Otherwise (the default) results are sorted by date descending. When using `&source=x`, we can only sort by data or that source, not a different source.

### publisher=x
### publisher_id=x
Only provide metrics for articles by a given publisher, using the `member_id`. The response format is the same as the default response.

```sh
/api/v5/articles?api_key=API_KEY&publisher=340
/api/v5/articles?api_key=API_KEY&publisher_id=340
```

### page|per_page
Expand Down
12 changes: 12 additions & 0 deletions spec/concerns/resolvable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@
expect(stub).to have_been_requested
end

it "get_canonical_url with trailing slash" do
work = FactoryGirl.create(:work_with_events, :doi => "10.1371/journal.pone.0000030")
clean_url = "http://www.plosone.org/article/info:doi/10.1371/journal.pone.0000030"
url = "#{clean_url}/"
stub = stub_request(:get, "http://dx.doi.org/#{work.doi}").to_return(:status => 302, :headers => { 'Location' => url })
stub = stub_request(:get, url).to_return(:status => 200, :headers => { 'Location' => url })
response = subject.get_canonical_url(work.doi_as_url, work_id: work.id)
expect(response).to eq(clean_url)
expect(Alert.count).to eq(0)
expect(stub).to have_been_requested
end

it "get_canonical_url with jsessionid" do
work = FactoryGirl.create(:work_with_events, :doi => "10.1371/journal.pone.0000030")
url = "http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0000030;jsessionid=5362E4D61F1953ADA2CB3F746E58AAC2.f01t03"
Expand Down
3 changes: 2 additions & 1 deletion spec/models/work_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@

it "http://" do
work = FactoryGirl.build(:work, :canonical_url => "http://")
expect{work}.to raise_error(Addressable::URI::InvalidURIError)
expect(work).not_to be_valid
#expect{work}.to raise_error(Addressable::URI::InvalidURIError)
end

it "asdfasdfasdf" do
Expand Down

0 comments on commit bacb103

Please sign in to comment.