In [1]:
# install biopython on Jupyter server.
import sys
!python -m pip install biopython



You should consider upgrading via the 'C:\Python38\python.exe -m pip install --upgrade pip' command.


In [2]:
import time
from Bio import Entrez

In [3]:
Entrez.email = "dbsnp-user@nih.gov" # provide your user email 
# RECOMMENDED: apply for API key from NCBI (https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/). 
# 10 queries per second with a valid API key, otherwise 3 queries per seconds are allowed for 'None'
Entrez.api_key = None

# dbSNP supported query terms (https://www.ncbi.nlm.nih.gov/snp/docs/entrez_help/) can be build and test online using web query builder (https://www.ncbi.nlm.nih.gov/snp/advanced) 
# esearch handle
eShandle = Entrez.esearch(db="snp",  # search dbSNP
                          #complex query for missense and pathogenic variants in LPL gene with global MAF betweeen 0 and 0.01.
                          term='LPL[All Fields] AND pathogenic[Clinical_Significance] AND missense variant[Function_Class] AND (00000.0000[GLOBAL_MAF] : 00000.0100[GLOBAL_MAF])', 
                          usehistory="y", #cache result on server for download in batches
                          retmax=20 # return 20 RSID max
                         )


In [4]:

# get esearch result
eSresult = Entrez.read(eShandle)

In [5]:
# review results 
for k in eSresult:
    print (k, ":", eSresult[k])
    
#Output: Web environment (&WebEnv) and query key (&query_key) parameters specifying the location on the Entrez history server of the list of UIDs matching the Entrez query
#https://www.ncbi.nlm.nih.gov/books/NBK25500/#chapter1.Storing_Search_Results
    

Count : 5
RetMax : 5
RetStart : 0
QueryKey : 1
WebEnv : MCID_61327ef47c171e289112e960
IdList : ['386571803', '118204057', '52818902', '17850737', '268']
TranslationSet : [{'From': 'LPL[All Fields]', 'To': 'LPL[All Fields]'}]
TranslationStack : [{'Term': 'LPL[All Fields]', 'Field': 'All Fields', 'Count': '12455', 'Explode': 'N'}, {'Term': 'pathogenic[Clinical_Significance]', 'Field': 'Clinical_Significance', 'Count': '98208', 'Explode': 'N'}, 'AND', {'Term': 'missense variant[Function_Class]', 'Field': 'Function_Class', 'Count': '9628782', 'Explode': 'N'}, 'AND', {'Term': '00000.0000[GLOBAL_MAF]', 'Field': 'GLOBAL_MAF', 'Count': '0', 'Explode': 'N'}, {'Term': '00000.0100[GLOBAL_MAF]', 'Field': 'GLOBAL_MAF', 'Count': '0', 'Explode': 'N'}, 'RANGE', 'GROUP', 'AND']
QueryTranslation : LPL[All Fields] AND pathogenic[Clinical_Significance] AND missense variant[Function_Class] AND (00000.0000[GLOBAL_MAF] : 00000.0100[GLOBAL_MAF])


In [6]:
# get result RSIDs list 'Idlist'
# total rs count 
rslist = (eSresult['IdList'])

In [7]:
# retmax = 20 so print only 20 RSIDs
# additional results can be retrieved by batches
# download in batches example http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc139 or see below.
for rs in rslist:
    print(rs)

386571803
118204057
52818902
17850737
268


In [8]:
# get the WebEnv session cookie, and the QueryKey:

webenv = eSresult["WebEnv"]
query_key = eSresult["QueryKey"]
total_count = int(eSresult["Count"])
query_key = eSresult["QueryKey"]
retmax = 2 # return 2 rs per batch example

In [9]:
# sample codes adopted with modifications from http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc139.
fetch_count = 0
for start in range(0, total_count, retmax):
    end = min(total_count, start+retmax)
    print("Going to download record %i to %i" % (start+1, end))
    attempt = 0
    #fetch_count += 1
    while (attempt < 3):
        attempt += 1
        try:
            fetch_handle = Entrez.efetch(db="snp",
                                         #rettype="uilist", #available types [uilist | xml (use retmode=xml))
                                         retmode="xml",
                                         retstart=start,
                                         retmax=retmax,
                                         webenv=webenv,
                                         query_key=query_key )
        except HTTPError as err:
            if 500 <= err.code <= 599:
                print("Received error from server %s" % err)
                print("Attempt %i of 3" % attempt)
                time.sleep(15)
            else:
                raise
    if (fetch_handle):
        #print(fetch_handle)            
        data = fetch_handle.read()
        print(data)
        fetch_handle.close()



Going to download record 1 to 2
b'<?xml version="1.0" ?>\n<ExchangeSet xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance" xmlns="https://www.ncbi.nlm.nih.gov/SNP/docsum" xsi:schemaLocation="https://www.ncbi.nlm.nih.gov/SNP/docsum ftp://ftp.ncbi.nlm.nih.gov/snp/specs/docsum_eutils.xsd" ><DocumentSummary uid="386571803"><SNP_ID>268</SNP_ID><ALLELE_ORIGIN/><GLOBAL_MAFS><MAF><STUDY>1000Genomes</STUDY><FREQ>G=0.005192/26</FREQ></MAF><MAF><STUDY>ALSPAC</STUDY><FREQ>G=0.018682/72</FREQ></MAF><MAF><STUDY>Estonian</STUDY><FREQ>G=0.023884/107</FREQ></MAF><MAF><STUDY>ExAC</STUDY><FREQ>G=0.013363/1622</FREQ></MAF><MAF><STUDY>FINRISK</STUDY><FREQ>G=0.013158/4</FREQ></MAF><MAF><STUDY>GENOME_DK</STUDY><FREQ>G=0.05/2</FREQ></MAF><MAF><STUDY>GnomAD</STUDY><FREQ>G=0.013274/1862</FREQ></MAF><MAF><STUDY>GnomAD_exomes</STUDY><FREQ>G=0.01278/3212</FREQ></MAF><MAF><STUDY>GoNL</STUDY><FREQ>G=0.03006/30</FREQ></MAF><MAF><STUDY>HapMap</STUDY><FREQ>G=0.00565/4</FREQ></MAF><MAF><STUDY>MGP</STUDY><FREQ>G=0.013

b'<?xml version="1.0" ?>\n<ExchangeSet xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance" xmlns="https://www.ncbi.nlm.nih.gov/SNP/docsum" xsi:schemaLocation="https://www.ncbi.nlm.nih.gov/SNP/docsum ftp://ftp.ncbi.nlm.nih.gov/snp/specs/docsum_eutils.xsd" ><DocumentSummary uid="268"><SNP_ID>268</SNP_ID><ALLELE_ORIGIN/><GLOBAL_MAFS><MAF><STUDY>1000Genomes</STUDY><FREQ>G=0.005192/26</FREQ></MAF><MAF><STUDY>ALSPAC</STUDY><FREQ>G=0.018682/72</FREQ></MAF><MAF><STUDY>Estonian</STUDY><FREQ>G=0.023884/107</FREQ></MAF><MAF><STUDY>ExAC</STUDY><FREQ>G=0.013363/1622</FREQ></MAF><MAF><STUDY>FINRISK</STUDY><FREQ>G=0.013158/4</FREQ></MAF><MAF><STUDY>GENOME_DK</STUDY><FREQ>G=0.05/2</FREQ></MAF><MAF><STUDY>GnomAD</STUDY><FREQ>G=0.013274/1862</FREQ></MAF><MAF><STUDY>GnomAD_exomes</STUDY><FREQ>G=0.01278/3212</FREQ></MAF><MAF><STUDY>GoNL</STUDY><FREQ>G=0.03006/30</FREQ></MAF><MAF><STUDY>HapMap</STUDY><FREQ>G=0.00565/4</FREQ></MAF><MAF><STUDY>MGP</STUDY><FREQ>G=0.013109/7</FREQ></MAF><MAF><STUDY>Northern

In [10]:
Entrez.email = "dbsnp-user@nih.gov" # provide your user email 
Entrez.api_key = None

# esummary handle

eShandle = Entrez.esummary(db="structure", id="19923,19922")

record = Entrez.read(eShandle)
for r in record:
    for k in r.keys():
        print(k, '\t', r[k])
    print('')


Item 	 []
Id 	 19923
PdbAcc 	 1L5J
PdbDescr 	 Crystal Structure Of E. Coli Aconitase B
EC 	 4.2.1.3
Resolution 	 2.4
ExpMethod 	 X-Ray Diffraction
PdbClass 	 Lyase
PdbDepositDate 	 2002/03/07 00:00
MMDBEntryDate 	 2002/07/11 00:00
OrganismList 	 ['Escherichia coli']
LigCode 	 F3S|TRA
LigCount 	 2
ModProteinResCount 	 0
ModDNAResCount 	 0
ModRNAResCount 	 0
ProteinChainCount 	 
DNAChainCount 	 
RNAChainCount 	 

Item 	 []
Id 	 19922
PdbAcc 	 1L5A
PdbDescr 	 Crystal Structure Of Vibh, An Nrps Condensation Enzyme
EC 	 
Resolution 	 2.55
ExpMethod 	 X-Ray Diffraction
PdbClass 	 Biosynthetic Protein
PdbDepositDate 	 2002/03/06 00:00
MMDBEntryDate 	 2002/07/11 00:00
OrganismList 	 ['Vibrio cholerae']
LigCode 	 
LigCount 	 0
ModProteinResCount 	 0
ModDNAResCount 	 0
ModRNAResCount 	 0
ProteinChainCount 	 
DNAChainCount 	 
RNAChainCount 	 



In [11]:
Entrez.email = "dbsnp-user@nih.gov" # provide your user email 
Entrez.api_key = None

# elink handle
pmid='10818692,27832061'
eLhandle = Entrez.elink(dbfrom="pubmed", id=pmid, linkname="pubmed_pubmed")
record = Entrez.read(eLhandle)
for r in record:
    for k in r.keys():
        print(k, '\t', r[k])
    print()

ERROR 	 []
LinkSetDb 	 [{'Link': [{'Id': '27832061'}, {'Id': '10818692'}, {'Id': '16610138'}, {'Id': '10075804'}, {'Id': '9743125'}, {'Id': '9697222'}, {'Id': '8879582'}, {'Id': '9081313'}, {'Id': '10644173'}, {'Id': '21618392'}, {'Id': '9492623'}, {'Id': '12567619'}, {'Id': '11014159'}, {'Id': '18551413'}, {'Id': '12017726'}, {'Id': '11837228'}, {'Id': '15459713'}, {'Id': '14505206'}, {'Id': '15054333'}, {'Id': '14681453'}, {'Id': '18551414'}, {'Id': '18769385'}, {'Id': '10185160'}, {'Id': '9846866'}, {'Id': '9210599'}, {'Id': '21871970'}, {'Id': '16539535'}, {'Id': '19194660'}, {'Id': '11398542'}, {'Id': '8634905'}, {'Id': '20470293'}, {'Id': '10550026'}, {'Id': '19384717'}, {'Id': '15067171'}, {'Id': '14681349'}, {'Id': '12717820'}, {'Id': '9847131'}, {'Id': '12721567'}, {'Id': '25905639'}, {'Id': '11503515'}, {'Id': '12594481'}, {'Id': '11280929'}, {'Id': '11125037'}, {'Id': '8374583'}, {'Id': '16351750'}, {'Id': '21311007'}, {'Id': '12700734'}, {'Id': '15085139'}, {'Id': '9104512'