# Access DIOPT mappings

DIOPT is an integrative orthology database, which uses integrates the output of 18 tools to make orthology predictions between a set of species. We make use of the www.gene2function.org service, which allows us to retrieve results more easily.

In [1]:
import biu
import pandas as pd

## Inspecting the DIOPT object

In [2]:
diopt = biu.maps.DIOPT()
print(diopt)

DIOPT object
 Objects:
 Files:
  * [X] data : /home/tgehrmann/repos/BIU/docs/diopt/h_sapiens.data.sqlite



## Querying the DIOPT object
The DIOPT object takes as input a single NCBI Entrez Gene ID. This can be either a string or a number.
It returns one record for each orthologous gene found.

In [3]:
diopt[4763]

[DIOPTResult(species='h_sapiens', ncbi='4763', symbol='NF1', id='7765', score='NA', confidence='', database='HGNC'),
 DIOPTResult(species='m_musculus', ncbi='18015', symbol='Nf1', id='97306', score='15/16', confidence='high', database='MGI'),
 DIOPTResult(species='r_norvegicus', ncbi='24592', symbol='Nf1', id='3168', score='11/14', confidence='high', database='RGD'),
 DIOPTResult(species='x_tropicalis', ncbi='100493879', symbol='nf1', id='XB-GENE-488024', score='6/12', confidence='high', database='Xenbase'),
 DIOPTResult(species='d_reiro', ncbi='564518', symbol='nf1b', id='ZDB-GENE-091111-4', score='13/15', confidence='high', database='ZFIN'),
 DIOPTResult(species='d_melanogaster', ncbi='43149', symbol='Nf1', id='FBgn0015269', score='12/15', confidence='high', database='FLYBASE'),
 DIOPTResult(species='c_elegans', ncbi='180530', symbol='gap-1', id='WBGene00001515', score='1/15', confidence='low', database='WormBase'),
 DIOPTResult(species='s_cerevisiae', ncbi='852437', symbol='IRA1', i

# Bulk lookup of orthologs

Sometimes it is handy to lookup the orthologs of multiple genes at a time. This can be done with the batch function:

In [4]:
#Let's first get some entrez Gene IDs:
symbols = ['NF1', 'RRAS', 'DUSP3', 'ATF4', 'RAF1', 'MKNK2', 'SHC4', 'IRS1',
       'LAMTOR3', 'SHC2', 'RASGRF2', 'RASGRP3', 'ETS2', 'ETS1', 'EGFR',
       'RPS6KA4', 'PTPRR', 'RASA1']

bm = biu.maps.BioMart()

ncbiFromSymbol = lambda s: list(set([ r.entrezgene for r in bm.hgnc_symbol[s] ]))[0]
geneIDs = [ ncbiFromSymbol(s) for s in symbols ]

In [11]:
diopt.batch(geneIDs).rename(index=dict(zip(geneIDs,symbols)))

Unnamed: 0,h_sapiens,s_pombe,s_cerevisiae,c_elegans,d_melanogaster,d_reiro,x_tropicalis,m_musculus,r_norvegicus
NF1,"(NF1, 7765)",(),"(IRA1, S000000344)",(),"(Nf1, FBgn0015269)","(nf1b, ZDB-GENE-091111-4)","(nf1, XB-GENE-488024)","(Nf1, 97306)","(Nf1, 3168)"
RRAS,"(RRAS, 10447)","(ras1, SPAC17H9.09c)","(RAS1, S000005627)","(ras-1, WBGene00004310)","(Ras64B, FBgn0003206)","(rras, ZDB-GENE-041010-217)","(rras, XB-GENE-493188)","(Rras, 98179)","(Rras, 1311443)"
DUSP3,"(DUSP3, 3069)","(pmp1, SPBC1685.01)","(YVH1, S000001465)","(C16A3.2, WBGene00015807)","(CG7378, FBgn0030976)","(dusp3a, ZDB-GENE-111207-3)","(dusp3, XB-GENE-961523)","(Dusp3, 1919599)","(Dusp3, 1560049)"
ATF4,"(ATF4, 786)",(),(),"(atf-5, WBGene00000221)","(crc, FBgn0000370)","(atf4a, ZDB-GENE-040426-2340)","(atf4, XB-GENE-983841)","(Atf4, 88096)","(Atf4, 621863)"
RAF1,"(RAF1, 9829)",(),(),"(lin-45, WBGene00003030)","(Raf, FBgn0003079)","(raf1b, ZDB-GENE-090826-2)","(raf1, XB-GENE-6053330)","(Raf1, 97847)","(Raf1, 3531)"
MKNK2,"(MKNK2, 7111)",(),(),"(mnk-1, WBGene00011304)","(Lk6, FBgn0017581)","(mknk2b, ZDB-GENE-030829-2)","(mknk2, XB-GENE-491527)","(Mknk2, 894279)","(Mknk2, 1305728)"
SHC4,"(SHC4, 16743)",(),(),"(shc-1, WBGene00018788)","(Shc, FBgn0015296)","(shc2, ZDB-GENE-050208-666)","(shc4, XB-GENE-490134)","(Shc4, 2655364)","(Shc4, 1583644)"
IRS1,"(IRS1, 6125)",(),(),(),"(chico, FBgn0024248)","(irs1, ZDB-GENE-030131-872)","(irs1, XB-GENE-478672)","(Irs1, 99454)","(Irs1, 2922)"
LAMTOR3,"(LAMTOR3, 15606)",(),(),"(lmtr-3, WBGene00007390)","(CG5110, FBgn0032642)","(lamtor3, ZDB-GENE-050522-345)","(lamtor3-like, XB-GENE-942321)","(Lamtor3, 1929467)","(Lamtor3, 1307133)"
SHC2,"(SHC2, 29869)",(),(),"(shc-1, WBGene00018788)","(Shc, FBgn0015296)","(shc2, ZDB-GENE-050208-666)","(shc2, XB-GENE-6073771)","(Shc2, 106180)","(Shc2, 1307137)"
