# NCBI Databases Information

In [2]:
from Bio import Entrez
Entrez.email = 'sbwiecko@free.fr'

## Obtaining information about the Entrez databases

See also [https://biopython.org/DIST/docs/tutorial/Tutorial.html#sec145](https://biopython.org/DIST/docs/tutorial/Tutorial.html#sec145).

In [3]:
record = Entrez.read(Entrez.einfo())
print(type(record))
print(record.keys())
print(record['DbList'])

<class 'Bio.Entrez.Parser.DictionaryElement'>
dict_keys(['DbList'])
['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'structure', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'proteinclusters', 'pcassay', 'protfam', 'biosystems', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'gtr']


## Read information from a particular database

In [4]:
record = Entrez.read(Entrez.einfo(db='genome')) # added `db` param

for key in record['DbInfo'].keys(): # changed the key name
    print(key, ':', record['DbInfo'][key])

DbName : genome
MenuName : Genome
Description : Genomic sequences, contigs, and maps
DbBuild : Build211011-0955.1
Count : 81132
LastUpdate : 2021/10/11 10:36
FieldList : [{'Name': 'ALL', 'FullName': 'All Fields', 'Description': 'All terms from all searchable fields', 'TermCount': '13097606', 'IsDate': 'N', 'IsNumerical': 'N', 'SingleToken': 'N', 'Hierarchy': 'N', 'IsHidden': 'N'}, {'Name': 'UID', 'FullName': 'UID', 'Description': 'Unique number assigned to genome', 'TermCount': '0', 'IsDate': 'N', 'IsNumerical': 'Y', 'SingleToken': 'Y', 'Hierarchy': 'N', 'IsHidden': 'Y'}, {'Name': 'FILT', 'FullName': 'Filter', 'Description': 'Limits the records', 'TermCount': '17', 'IsDate': 'N', 'IsNumerical': 'N', 'SingleToken': 'Y', 'Hierarchy': 'N', 'IsHidden': 'N'}, {'Name': 'ORGN', 'FullName': 'Organism', 'Description': 'Organism', 'TermCount': '610398', 'IsDate': 'N', 'IsNumerical': 'N', 'SingleToken': 'Y', 'Hierarchy': 'Y', 'IsHidden': 'N'}, {'Name': 'PID', 'FullName': 'ProjectID', 'Description

In [12]:
Entrez.read(Entrez.einfo(db='pmc'))['DbInfo']['FieldList'][0]['Description']

'All terms from all searchable fields'

## Obtaining spelling suggestions

Suggests spelling corrections. See also [https://biopython.org/DIST/docs/tutorial/Tutorial.html#sec152](https://biopython.org/DIST/docs/tutorial/Tutorial.html#sec152)

In [13]:
term = 'biobython' # entered a mispelled term

record = Entrez.read(
    Entrez.espell(
        db='pmc',
        term=term,
    ))
print(type(record))
print(record.keys())

for key in record.keys(): # changed the key name
    print(key, ':', record[key])

<class 'Bio.Entrez.Parser.DictionaryElement'>
dict_keys(['Database', 'Query', 'CorrectedQuery', 'SpelledQuery'])
Database : pmc
Query : biobython
CorrectedQuery : biopython
SpelledQuery : ['', 'biopython']


### Exercice spelling correction

The The following list contains 45 scientific names for animals. We want to ensure that these names are correct. Print the name of the query and the correction you received.

In [None]:
sciNames = [
    'Bos gaurus',
    'Antelope cervicapra',
    'Gazella bennettii',
    'Boselaphus tragocamelus',
    'Canis lupus',
    'Panthera leo',
    'Elephas maximus',
    'Equus africanus',
    'Panthera pardus',
    'Cervus canadensis',
    'Pavo cristatus',
    'Grus leucogeranus',
    'Vulpes vulpes',
    'Rhinoceros unicornis',
    'Panthera Tigris',
    'Crocodylus palustris',
    'Gavialis gangeticus',
    'Equus caballus',
    'Equus quagga',
    'Babalus bubalis',
    'Sus scrofa',
    'Camelus dromedaries',
    'Giraffa camelopardalis ',
    'Hemidactylus flaviviridis',
    'Hippopotamus amphibius',
    'Macaca mulatta',
    'Canis lupus',
    'Felis domesticus',
    'Acinonyx jubatus',
    'Rattus rattus',
    'Mus musculus',
    'Oryctolagus cuniculus',
    'Bubo virginianus',
    'Passer domesticus',
    'Corvus splendens',
    'Acridotheres tristis',
    'Psittacula eupatria',
    'Molpastes cafer',
    'Eudynamis scolopaccus',
    'Columba livia',
    'Naja naja',
    'Ophiophagus hannah',
    'Hydrophiinae ',
    'Python molurus',
    'Ptyas mucosa'
]