Permalink
Browse files

Fix updating drugbank links via name

  • Loading branch information...
ngiger committed Apr 3, 2017
1 parent 909554b commit 9d0ef85616658b273fd85b94a12371e4ea152d91
Showing with 77 additions and 72 deletions.
  1. +28 −33 src/plugin/drugbank.rb
  2. +49 −39 test/test_plugin/drugbank.rb
View
@@ -12,80 +12,75 @@ module ODDB
# Note:: Atcclass#db_id is ID from DB of drugbank.ca
# See:: http://www.drugbank.ca/documentation
class DrugbankPlugin < Plugin
def initialize app=nil
PAGE_DOES_NOT_EXIST = "How did you get here? That page doesn't exist. Oh well, it happens."
def initialize(app=nil, agent: Mechanize.new)
super app
@search_url = "http://www.drugbank.ca/search?utf8=&query=%s&commit=Search"
# https://www.drugbank.ca/unearth/q?utf8=%E2%9C%93&query=Tamoxifen&searcher=drugs&approved=1&vet_approved=1&nutraceutical=1&illicit=1&withdrawn=1&investigational=1&button=
@links = []
# report
@checked = 0
@nonlinked = 0
@activated = 0
@changed = 0
@agent = agent
end
##
# Update id of drugbank.ca for direct link url
def update_db_id
start_time = Time.now
@app.atc_classes.values.each do |atc|
next if atc.description.empty? # skip parent atc_class
next unless atc.code.length > 6 # short codes are not in drugbank.ca
sleep 5
_search_with atc
db_id = _extract_db_id
@checked += 1
db_id = _search_with(atc, @agent)
puts "Changing #{atc.db_id} => #{db_id}" unless db_id.eql?(atc.db_id) if $VERBOSE
@changed += 1 unless db_id.eql?(atc.db_id)
@app.update atc.pointer, { :db_id => db_id }
db_id.nil? ? @nonlinked += 1 : @activated += 1
@checked += 1
end
@duration_in_secs = (Time.now.to_i - start_time.to_i)
end
def report
[
"Checked ATC classes : #{@checked}",
"Actived Drugbank Link : #{@activated}",
"Non-link ATC classes : #{@nonlinked}",
"Updated ATC classes : #{@changed}",
"Update job took : #{@duration_in_secs.to_i} seconds",
].join("\n")
end
private
def _search_with atc
def _search_with(atc, agent)
@links = []
agent = Mechanize.new
agent.keep_alive = false
agent.user_agent_alias = 'Linux Firefox'
page = nil
limit = 3
tried = 0
url = 'https://www.drugbank.ca/unearth/q?utf8=%E2%9C%93&query=' + atc.name +
'&searcher=drugs&approved=1&vet_approved=1&nutraceutical=1&illicit=1&withdrawn=1&investigational=1&button='
begin
tried += 1
page = agent.get(@search_url % atc.code)
page = agent.get(url)
rescue Mechanize::ResponseCodeError => e
return nil
rescue Net::HTTP::Persistent::Error => e
if /timeout/iu =~ e.message and tried <= limit
sleep 10
retry
end
end
if page
@links = page.links.select do |link|
link if link.text.match /DB/
end
end
end
##
# Extract first ID from search result
#
# URL format is:
# http://www.drugbank.ca/drugs/DB00571
def _extract_db_id
db_id = nil
@links.each do |link|
if link.uri.to_s =~ /drugs\/(DB\d{5})/iu
case db_id
when NilClass
db_id = $1
when String
db_id = [db_id, $1]
when Array
db_id << $1
end
begin
if page && page.links && (reference = page.links.find{ |x| /^D\w{3}\d{3}/.match(x.to_s) })
puts "Found #{reference.to_s} for #{atc.code} #{atc.name} with #{atc.active_packages.size} active_packages" if $VERBOSE
# first one is A01AB03 with 22 active_packages
return reference.to_s
end
rescue => err
puts "Error search for #{atc.code} #{atc.name}" if $VERBOSE
end
db_id
puts "Nothing found #{reference.to_s} for #{atc.code} #{atc.name}" if $VERBOSE
nil
end
end
end
@@ -15,77 +15,91 @@
module ODDB
class DrugbankPlugin < Plugin
attr_accessor :links,
:checked, :activated, :nonlinked
private
def _search_with atc
return @links
end
attr_accessor :checked, :activated, :nonlinked
end
end
module ODDB
class TestDrugbankPlugin <Minitest::Test
ATC_FOUND_IN_DRUGBANK = 'ABC1234'
ATC_ABSENT_IN_DRUGBANK = 'ABC5678'
ATC_ERROR_IN_DRUGBANK = 'ABC9999'
def setup
@app = FlexMock.new 'app'
@app.should_receive(:update).and_return do |pointer, hash|
assert_equal 'atc-pointer', pointer
assert hash.has_key?(:db_id)
end
@plugin = DrugbankPlugin.new @app
@valid_page = %(<html><bod>
<meta content="DB00999" name="dc.identifier" /><meta content="Hydrochlorothiazide" name="dc.title" /><title>Hydrochlorothiazide - DrugBank</title><link rel="apple-touch-icon" type="image/x-icon" href="/favicons/apple-touch-icon-57x57-precomposed.png" sizes="57x57" />
</body>
</html>)
@invalid_page = %(<html><bod>
<main role="main"><h1>How did you get here? That page doesn't exist. Oh well, it happens.</h1>
</body>
</html>)
@agent = flexmock('agent', Mechanize.new)
start_url = 'https://www.drugbank.ca/unearth/q?utf8=%E2%9C%93&query='
end_of_url = '&searcher=drugs&approved=1&vet_approved=1&nutraceutical=1&illicit=1&withdrawn=1&investigational=1&button='
@agent.should_receive(:get).with(start_url + 'name' + end_of_url).and_return(Nokogiri::HTML(@valid_page)).by_default
@agent.should_receive(:get).with(start_url + 'invalid_name' + end_of_url).and_return(Nokogiri::HTML(@invalid_page)).by_default
@plugin = DrugbankPlugin.new(@app, :agent => @agent)
@atc = flexmock('atc')
@atc.should_receive(:pointer).and_return('atc-pointer')
@atc.should_receive(:name).and_return('name')
@atc.should_receive(:db_id).and_return('db_id').by_default
end
def teardown
super # to clean up FlexMock
#pass
end
def test_update_db_id_with_valid_atc
@atc.should_receive(:code).and_return('ABC1234')
@atc.should_receive(:code).and_return(ATC_FOUND_IN_DRUGBANK)
@atc.should_receive(:description).and_return('desc')
@app.should_receive(:atc_classes).and_return({ :good => @atc })
@plugin.links = [
flexmock('link', { :uri => '/drugs/DB56789' })
]
@plugin.update_db_id
assert_equal(@plugin.checked, 1)
assert_equal(@plugin.activated, 1)
assert_equal(@plugin.nonlinked, 0)
assert_equal(1, @plugin.checked)
assert_equal(0, @plugin.activated)
assert_equal(1, @plugin.nonlinked)
end
def test_update_db_id_with_short_atc_code
@atc.should_receive(:code).and_return('ABC')
@atc.should_receive(:description).and_return('desc')
@app.should_receive(:atc_classes).and_return({ :short => @atc })
@plugin.update_db_id
assert_equal(@plugin.checked, 0)
assert_equal(@plugin.activated, 0)
assert_equal(@plugin.nonlinked, 0)
assert_equal(0, @plugin.checked)
assert_equal(0, @plugin.activated)
assert_equal(0, @plugin.nonlinked)
end
def test_update_db_id_with_empty_atc_desc
@atc.should_receive(:code).and_return('ABC1234')
@atc.should_receive(:code).and_return(ATC_FOUND_IN_DRUGBANK)
@atc.should_receive(:description).and_return('')
@app.should_receive(:atc_classes).and_return({ :empty => @atc })
@plugin.update_db_id
assert_equal(@plugin.checked, 0)
assert_equal(@plugin.activated, 0)
assert_equal(@plugin.nonlinked, 0)
assert_equal(0, @plugin.checked)
assert_equal(0, @plugin.activated)
assert_equal(0, @plugin.nonlinked)
end
def test_update_db_id_with_no_id_found
@atc.should_receive(:code).and_return('ABC1234')
@atc.should_receive(:code).and_return(ATC_ABSENT_IN_DRUGBANK)
@atc.should_receive(:description).and_return('desc')
@app.should_receive(:atc_classes).and_return({ :nolink => @atc })
@plugin.update_db_id
assert_equal(1, @plugin.checked)
assert_equal(0, @plugin.activated)
assert_equal(1, @plugin.nonlinked)
end
def test_update_db_id_response_code_error
@atc.should_receive(:code).and_return(ATC_ABSENT_IN_DRUGBANK)
@atc.should_receive(:description).and_return('desc')
@app.should_receive(:atc_classes).and_return({ :nolink => @atc })
@plugin.links = [
flexmock('link', { :uri => '/foo/CC1234/' }),
flexmock('link', { :uri => '/DB/00000/' }),
flexmock('link', { :uri => '/drugs/DB0000/' })
]
@plugin.update_db_id
assert_equal(@plugin.checked, 1)
assert_equal(@plugin.activated, 0)
assert_equal(@plugin.nonlinked, 1)
assert_equal(1, @plugin.checked)
assert_equal(0, @plugin.activated)
assert_equal(1, @plugin.nonlinked)
end
def test_update_db_id_with_multi_links
@atc.should_receive(:code).and_return('ABC1234')
@atc.should_receive(:code).and_return(ATC_FOUND_IN_DRUGBANK)
@atc.should_receive(:description).and_return('desc')
@app.should_receive(:atc_classes).and_return({ :multi => @atc })
@app.should_receive(:update).and_return do |pointer, hash|
@@ -94,18 +108,14 @@ def test_update_db_id_with_multi_links
assert_equal Array, hash[:db_id].class
assert_equal 2, hash[:db_id].length
end
@plugin.links = [
flexmock('link', { :uri => '/drugs/DB12345' }),
flexmock('link', { :uri => '/drugs/DB56789/' })
]
assert_equal(0, @plugin.activated)
@plugin.update_db_id
assert_equal(@plugin.checked, 1)
assert_equal(@plugin.activated, 1)
assert_equal(@plugin.nonlinked, 0)
assert_equal(1, @plugin.checked)
assert_equal(1, @plugin.nonlinked)
end
def test_report
report = @plugin.report
assert_equal 3, report.split("\n").length
assert_equal 5, report.split("\n").length
end
end
end

0 comments on commit 9d0ef85

Please sign in to comment.