# EInfo: Obtaining information about the Entrez databases

In [1]:
from Bio import Entrez
Entrez.email = "vela.vela.luis@gmail.com"  
handle = Entrez.einfo()
result = handle.read()
handle.close()
print(result)

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE eInfoResult PUBLIC "-//NLM//DTD einfo 20130322//EN" "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20130322/einfo.dtd">
<eInfoResult>
<DbList>

	<DbName>pubmed</DbName>
	<DbName>protein</DbName>
	<DbName>nuccore</DbName>
	<DbName>ipg</DbName>
	<DbName>nucleotide</DbName>
	<DbName>nucgss</DbName>
	<DbName>nucest</DbName>
	<DbName>structure</DbName>
	<DbName>sparcle</DbName>
	<DbName>genome</DbName>
	<DbName>annotinfo</DbName>
	<DbName>assembly</DbName>
	<DbName>bioproject</DbName>
	<DbName>biosample</DbName>
	<DbName>blastdbinfo</DbName>
	<DbName>books</DbName>
	<DbName>cdd</DbName>
	<DbName>clinvar</DbName>
	<DbName>clone</DbName>
	<DbName>gap</DbName>
	<DbName>gapplus</DbName>
	<DbName>grasp</DbName>
	<DbName>dbvar</DbName>
	<DbName>gene</DbName>
	<DbName>gds</DbName>
	<DbName>geoprofiles</DbName>
	<DbName>homologene</DbName>
	<DbName>medgen</DbName>
	<DbName>mesh</DbName>
	<DbName>ncbisearch</DbName>
	<DbName>nlmcatalog</DbName>
	<DbName

In [2]:
from Bio import Entrez
handle = Entrez.einfo()
record = Entrez.read(handle)
record.keys()

dict_keys(['DbList'])

In [3]:
record["DbList"]

['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'nucgss', 'nucest', 'structure', 'sparcle', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'unigene', 'gencoll', 'gtr']

In [4]:
handle = Entrez.einfo(db="gds")
record = Entrez.read(handle)
record["DbInfo"]["Description"]

'GEO DataSets'

In [5]:
record["DbInfo"]["Count"]

'3064302'

In [6]:
record["DbInfo"]["LastUpdate"]

'2019/03/10 17:09'

In [7]:
for field in record["DbInfo"]["FieldList"]:
    print("%(Name)s, %(FullName)s, %(Description)s" % field)

ALL, All Fields, All terms from all searchable fields
UID, UID, Unique number assigned to publication
FILT, Filter, Limits the records
ORGN, Organism, exploded organism names
ACCN, GEO Accession, accession for GDS (DataSet), GPL (Platform), GSM (Sample), GSE (Series)
TITL, Title, Words in title of record
DESC, Description, Text from description, summary and other similar fields
SFIL, Supplementary Files, Supplementary Files
ETYP, Entry Type, Entry type (DataSet or Series)
STYP, Sample Type, Sample type
VTYP, Sample Value Type, type of values, e.g. log ratio, count
PTYP, Platform Technology Type, Platform technology type
GTYP, DataSet Type, type of dataset
NSAM, Number of Samples, Number of samples
SRC, Sample Source, sample source
AUTH, Author, author of the GEO Sample, Platform or Series
INST, Submitter Institute, institute, or organization affiliatedd with contributers
NPRO, Number of Platform Probes, number of platform probes
SSTP, Subset Variable Type, subset variable type
SSDE, Su

# ESearch: Searching the Entrez databases

In [12]:
from Bio import Entrez
Entrez.email = "vela.vela.luis@gmail.com" 
handle = Entrez.esearch(db="gds", term='GSE117746[ACCN]')
record = Entrez.read(handle)
print(record['IdList'])

['200117746', '100018573', '303308153', '303308152', '303308151', '303308150', '303308149', '303308148']


In [13]:
count = record['Count']
handle = Entrez.esearch(db="gds", term='GSE117746[ACCN]', retmax=count)
record = Entrez.read(handle)
print(len(record['IdList']))

8


In [15]:
record['IdList'][:25]

['200117746',
 '100018573',
 '303308153',
 '303308152',
 '303308151',
 '303308150',
 '303308149',
 '303308148']

In [16]:
'303308148' in record['IdList']

True

# ESummary: Retrieving summaries from primary IDs

In [17]:
from Bio import Entrez
Entrez.email = "vela.vela.luis@gmail.com"
handle = Entrez.esummary(db="gds", id="303308148")
record = Entrez.read(handle)

In [18]:
record[0].keys()

dict_keys(['Item', 'Id', 'Accession', 'GDS', 'title', 'summary', 'GPL', 'GSE', 'taxon', 'entryType', 'gdsType', 'ptechType', 'valType', 'SSInfo', 'subsetInfo', 'PDAT', 'suppFile', 'Samples', 'Relations', 'ExtRelations', 'n_samples', 'SeriesTitle', 'PlatformTitle', 'PlatformTaxa', 'SamplesTaxa', 'PubMedIds', 'Projects', 'FTPLink', 'GEO2R'])

In [19]:
record[0]['summary']

'T47D cells'

In [20]:
for key, value in record[0].items():
    print(key, ' : ', value)

Item  :  []
Id  :  303308148
Accession  :  GSM3308148
GDS  :  
title  :  T47D control RNA 1
summary  :  T47D cells
GPL  :  18573
GSE  :  117746;117748
taxon  :  Homo sapiens
entryType  :  GSM
gdsType  :  
ptechType  :  
valType  :  
SSInfo  :  
subsetInfo  :  
PDAT  :  2019/03/04
suppFile  :  
Samples  :  []
Relations  :  []
ExtRelations  :  [DictElement({'RelationType': 'SRA', 'TargetObject': 'SRX4474095', 'TargetFTPLink': 'ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/sra/SRX/SRX447/SRX4474095/'}, attributes={})]
n_samples  :  0
SeriesTitle  :  
PlatformTitle  :  
PlatformTaxa  :  
SamplesTaxa  :  
PubMedIds  :  []
Projects  :  []
FTPLink  :  
GEO2R  :  


# Automatic process for the GSE

In [33]:
from Bio import Entrez
Entrez.email = "vela.vela.luis@gmail.com" 

query_dataset = 'GSE117746'
type_of_query = '[ACCN]'

In [34]:
handle = Entrez.esearch(db="gds", term=query_dataset+type_of_query)
record = Entrez.read(handle)
every_id = record['IdList']
print(every_id)

['200117746', '100018573', '303308153', '303308152', '303308151', '303308150', '303308149', '303308148']


In [36]:
for e_id in every_id: 
    handle = Entrez.esummary(db="gds", id=e_id)
    record = Entrez.read(handle)
    print('ID: ', e_id)
    print('Summary: ', record[0]['summary'])
    print()

ID:  200117746
Summary:  CDK4/6 inhibition is now part of the standard armamentarium for patients with estrogen receptor (ER)-positive breast cancer, so that defining mechanisms of resistance is a pressing issue. Here, we identify increased CDK6 expression as a key determinant of acquired resistance after exposure to palbociclib in ER-positive breast cancer cells. Increased CDK6 in resistant cells was dependent on TGF-β pathway suppression via miR-432-5p expression. Exosomal miR-432-5p expression mediated transfer of the resistance phenotype between neighboring cell populations. We confirmed these data in pre-treatment and post-progression biopsies from a parotid cancer patient who had responded to ribociclib, demonstrating clinical relevance of this mechanism. Additionally, the CDK4/6 inhibitor resistance phenotype can be reversed in vitro and in vivo by a prolonged drug holiday.

ID:  100018573
Summary:  

ID:  303308153
Summary:  T47D cells

ID:  303308152
Summary:  T47D cells

ID: 