Skip to content

Commit

Permalink
Merge pull request #355 from mfenner/master
Browse files Browse the repository at this point in the history
ALM-793 error parsing pmc feed. Closes #354.
  • Loading branch information
Martin Fenner committed Jul 6, 2015
2 parents 4f90ac6 + 6dc7863 commit 86eb35a
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 55 deletions.
95 changes: 54 additions & 41 deletions app/models/sources/pmc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,52 +81,65 @@ def parse_feed(month, year, _options = {})

journals_array.each do |journal|
filename = "pmcstat_#{journal}_#{month}_#{year}.xml"
file = File.open("#{Rails.root}/data/#{filename}", 'r') { |f| f.read }
document = Nokogiri::XML(file)

status = document.at_xpath("//pmc-web-stat/response/@status").value
if status != "0"
error_message = document.at_xpath("//pmc-web-stat/response/error").content
message = "PMC Usage stats for journal #{journal}, month #{month} and year #{year}: #{error_message}"
Alert.where(message: message).where(unresolved: true).first_or_create(
:exception => "",
:class_name => "Net::HTTPInternalServerError",
:status => 500,
:source_id => id)
journals_with_errors << journal
else
# go through all the works in the xml document
document.xpath("//work").each do |work|
work = work.to_hash
work = work["work"]

doi = work["meta-data"]["doi"]
# sometimes doi metadata are missing
break unless doi

view = work["usage"]
view['year'] = year.to_s
view['month'] = month.to_s

# try to get the existing information about the given work
data = get_result(url_db + CGI.escape(doi))

if data['views'].nil?
data = { 'views' => [view] }
else
# update existing entry
data['views'].delete_if { |view| view['month'] == month.to_s && view['year'] == year.to_s }
data['views'] << view
end

put_lagotto_data(url_db + CGI.escape(doi), data: data)
end
end
status = parse_file(filename, month, year)
journals_with_errors << journal if status != "0"
end
end
journals_with_errors
end

def parse_file(filename, month, year)
file = File.open("#{Rails.root}/data/#{filename}", 'r') { |f| f.read }
document = Nokogiri::XML(file)

status = document.at_xpath("//pmc-web-stat/response/@status").value

if status != "0"
error_message = document.at_xpath("//pmc-web-stat/response/error").content
message = "PMC Usage stats for journal #{journal}, month #{month} and year #{year}: #{error_message}"
Alert.where(message: message).where(unresolved: true).first_or_create(
:exception => "",
:class_name => "Net::HTTPInternalServerError",
:status => 500,
:source_id => id)
else
# go through all the works in the xml document
document.xpath("//article").each do |work|
data = parse_work(work, month, year)
next unless data.present? && data["doi"].present?

put_lagotto_data(url_db + CGI.escape(data["doi"]), data: data)
end
end
status
end

def parse_work(work, month, year)
work = work.to_hash
work = work["article"]

doi = work.fetch("meta-data", {}).fetch("doi", nil)
# sometimes doi metadata are missing
return nil unless doi.present?

view = work["usage"]
view['doi'] = doi
view['year'] = year.to_s
view['month'] = month.to_s

# try to get the existing information about the given work
data = get_result(url_db + CGI.escape(doi))

if data['views'].nil?
data = { 'views' => [view] }
else
# update existing entry
data['views'].delete_if { |view| view['month'] == month.to_s && view['year'] == year.to_s }
data['views'] << view
end
data
end

def put_database
put_lagotto_data(url_db)
end
Expand Down
7 changes: 7 additions & 0 deletions spec/factories/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@
FactoryGirl.create(:retrieval_status, :with_twitter_search, work: work)
end
end

factory :work_with_pmc_usage_stats do
after :create do |work|
FactoryGirl.create(:retrieval_status, :with_pmc, work: work, readers: 0, pdf: 10, html: 50, total: 60)
end
end
end

factory :retrieval_status do
Expand All @@ -159,6 +165,7 @@
trait(:with_mendeley) { association :source, factory: :mendeley }
trait(:with_pubmed) { association :source, factory: :pub_med }
trait(:with_nature) { association :source, factory: :nature }
trait(:with_pmc) { association :source, factory: :pmc }
trait(:with_wos) { association :source, factory: :wos }
trait(:with_researchblogging) { association :source, factory: :researchblogging }
trait(:with_scienceseeker) { association :source, factory: :scienceseeker }
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 25 additions & 14 deletions spec/models/pmc_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,39 @@
end

context "parse PMC data" do
let(:a_month_ago) { Time.zone.now - 1.month }
let(:month) { a_month_ago.month }
let(:year) { a_month_ago.year }
let(:month) { 1 }
let(:year) { 2014 }
let(:journal) { "ajrccm" }
let(:filename) { "pmcstat_#{journal}_#{month}_#{year}.xml" }
let(:file) { "#{Rails.root}/data/" + filename }

before(:each) do
subject.put_lagotto_data(subject.url_db)
FileUtils.cp(fixture_path + 'pmc_alt.xml', file)
end

after(:each) do
subject.delete_lagotto_data(subject.url_db)
FileUtils.rm file
end

it "should parse PMC data" do
config = subject.publisher_configs.first
publisher_id = config[0]
journal = config[1].journals.split(" ").first
stub = stub_request(:get, subject.get_feed_url(publisher_id, month, year, journal)).to_return(:body => File.read(fixture_path + 'pmc_alt.xml'))
expect(subject.get_feed(month, year)).to be_empty
expect(subject.parse_feed(month, year)).to be_empty
expect(stub).to have_been_requested
expect(Alert.count).to eq(0)
end

it "should parse file" do
expect(subject.parse_file(filename, month, year)).to eq("0")
end

it "should parse work" do
document = Nokogiri::XML(File.read(file))
work = document.xpath("//article").first
expect(subject.parse_work(work, month, year)).to eq("views"=>[{"unique-ip"=>"17", "full-text"=>"20", "pdf"=>"3", "abstract"=>"0", "scanned-summary"=>"0", "scanned-page-browse"=>"0", "figure"=>"0", "supp-data"=>"0", "cited-by"=>"0", "doi"=>"10.1164/rccm.200612-1772OC", "year"=>"2014", "month"=>"1"}])
end
end

context "get_data" do
Expand All @@ -62,12 +73,12 @@
end

it "should report that there are no events if the doi is missing" do
work = FactoryGirl.create(:work, :doi => nil)
work = FactoryGirl.create(:work, doi: nil)
expect(subject.get_data(work)).to eq({})
end

it "should report if there are no events returned by the PMC API" do
work = FactoryGirl.create(:work, :doi => "10.1371/journal.pone.0044294")
work = FactoryGirl.create(:work, doi: "10.1371/journal.pone.0044294")
body = File.read(fixture_path + 'pmc_nil.json')
stub = stub_request(:get, subject.get_query_url(work)).to_return(:body => body)
response = subject.get_data(work)
Expand All @@ -76,7 +87,7 @@
end

it "should report if there are events returned by the PMC API" do
work = FactoryGirl.create(:work, :doi => "10.1371/journal.pbio.1001420")
work = FactoryGirl.create(:work, doi: "10.1371/journal.pbio.1001420")
body = File.read(fixture_path + 'pmc.json')
stub = stub_request(:get, subject.get_query_url(work)).to_return(:body => body)
response = subject.get_data(work)
Expand All @@ -85,7 +96,7 @@
end

it "should catch errors with the PMC API" do
work = FactoryGirl.create(:work, :doi => "10.1371/journal.pone.0000001")
work = FactoryGirl.create(:work, doi: "10.1371/journal.pone.0000001")
stub = stub_request(:get, subject.get_query_url(work)).to_return(:status => [408])
response = subject.get_data(work, options = { :source_id => subject.id })
expect(response).to eq(error: "the server responded with status 408 for http://127.0.0.1:5984/pmc_usage_stats_test/#{work.doi_escaped}", :status=>408)
Expand All @@ -100,14 +111,14 @@

context "parse_data" do
it "should report that there are no events if the doi is missing" do
work = FactoryGirl.create(:work, :doi => nil)
work = FactoryGirl.create(:work, doi: nil)
result = {}
result.extend Hashie::Extensions::DeepFetch
expect(subject.parse_data(result, work)).to eq(events: { source: "pmc", work: work.pid, pdf: 0, html: 0, total: 0, events_url: nil, extra: [], months: [] })
end

it "should report if there are no events returned by the PMC API" do
work = FactoryGirl.create(:work, :doi => "10.1371/journal.pone.0044294")
work = FactoryGirl.create(:work, doi: "10.1371/journal.pone.0044294")
body = File.read(fixture_path + 'pmc_nil.json')
result = JSON.parse(body)
result.extend Hashie::Extensions::DeepFetch
Expand All @@ -116,7 +127,7 @@
end

it "should report if there are events returned by the PMC API" do
work = FactoryGirl.create(:work, :doi => "10.1371/journal.pbio.1001420")
work = FactoryGirl.create(:work, doi: "10.1371/journal.pbio.1001420")
body = File.read(fixture_path + 'pmc.json')
result = JSON.parse(body)
result.extend Hashie::Extensions::DeepFetch
Expand All @@ -130,7 +141,7 @@
end

it "should catch timeout errors with the PMC API" do
work = FactoryGirl.create(:work, :doi => "10.2307/683422")
work = FactoryGirl.create(:work, doi: "10.2307/683422")
result = { error: "the server responded with status 408 for http://127.0.0.1:5984/pmc_usage_stats_test/", status: 408 }
response = subject.parse_data(result, work)
expect(response).to eq(result)
Expand Down

0 comments on commit 86eb35a

Please sign in to comment.