Skip to content

Commit

Permalink
Added comments to ncbi adaptor
Browse files Browse the repository at this point in the history
  • Loading branch information
Siavash Mirarab committed Jun 8, 2012
1 parent d4a97a8 commit 0bdd5a2
Showing 1 changed file with 19 additions and 5 deletions.
24 changes: 19 additions & 5 deletions tnrs_handler/bin/tnrs_adapter/ncbi_adapter/ncbi_adaptor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
'''
This script reads taxon names from standard input, one per line, and searches NCBI taxonomy DB
for matches. If matches are found, taxonomy is searched again to find the accepted name for the queried names.
Results are returned as a json file.
Developer: Siavash Mirarab (smirarab@gmail.com)
'''

import urllib
import time
import sys
Expand All @@ -13,12 +21,15 @@
MP_ID_ERROR = "We expect one ID back from NCBI, but we got more than one ID. Oops!"
HTML_ERROR = "HTML error in accessing NCBI"


'''Search for a given name and find its taxonomic ID.'''
def search_NCBI_for_ids(search_term):

# Search the taxanomy DB of NCBI for a given term
parameters={"db":'taxonomy',"term":search_term,"tool":"tnrastic"}

url = "%s?%s" %(BASE_SEARCH_QUARY,urllib.urlencode(parameters))# URL just for outputting errors
url = "%s?%s" %(BASE_SEARCH_QUARY,urllib.urlencode(parameters)) # URL just for outputting errors

# Search taxonomy for a given term. In case of an error, wait a second and try again.
# Do this for a maximum of 20 times
succ = False
Expand All @@ -43,6 +54,7 @@ def search_NCBI_for_ids(search_term):
# If error in the XML file, try again
succ = dom.find("ERROR") is None or dom.find("ERROR").text is None

# Basic error handling
if f.getcode() != 200:
raise Exception (HTML_ERROR, f.getcode())
if not succ:
Expand All @@ -56,7 +68,7 @@ def search_NCBI_for_ids(search_term):
# Find the retrieved IDs
idList = idListElement[0].findall("Id")

# We expect to get only one id back. Find that one ID and return it.
if len(idList) == 0:
return None
elif len(idList) > 1:
Expand All @@ -65,6 +77,8 @@ def search_NCBI_for_ids(search_term):
id = idList[0].text
return id

''' For a given list of ids this function returns the accepted names.
Results are returned as a dictionary'''
def get_name_for_ids(ids):

idToName = {}
Expand Down Expand Up @@ -101,15 +115,15 @@ def get_name_for_ids(ids):
try:
for t in sys.stdin:
term = t.replace("\n","")
id = search_NCBI_for_ids(term)
id = search_NCBI_for_ids(term) # First search the name to find the IDs
time.sleep(0.01)# a bit of sleep to help NCBI
if id is not None:
id2term[id] = term
#print >>sys.stderr, "%s was mapped to %s " %(term,id)
res[term] = (id,None)
id2names = get_name_for_ids(id2term.keys())
id2names = get_name_for_ids(id2term.keys()) # search with found IDs to grab the taxonomic name.
for id in id2names.keys():
res[id2term[id]] = ("%s/%s" %(TAXON_URL_BASE,res[id2term[id]][0]), id2names[id])
res[id2term[id]] = ("%s/%s" %(TAXON_URL_BASE,res[id2term[id]][0]), id2names[id]) # Build URL from IDs

jres["status"] = "200"
jres["errorMessage"] = ""
Expand Down

0 comments on commit 0bdd5a2

Please sign in to comment.