# Mapping gene identifiers

In [1]:
import biu

# Set the default data location
where = '/exports/molepi/tgehrmann/data/'
biu.config.settings.setWhere(where)
hm = biu.maps.Human(overwritePickle=True) # Gene mappings for human genome


def exampleMapping(GMO):
    # GMO : Gene Mapping Object
    symbol = "MTOR"
    geneid = GMO.getSymbolGeneID(symbol)[0]
    print("%s -> %s" % (symbol, geneid))
    symbol = GMO.getGeneIDSymbol(geneid)[0]
    print("%s -> %s" % (geneid, symbol))
    ensembl = GMO.getSymbolEnsembl(symbol)[0]
    print("%s -> %s" % (symbol, ensembl))
    symbol = GMO.getEnsemblSymbol(ensembl)[0]
    print("%s -> %s" % (ensembl, symbol))
#edef

def exampleMappingSilent(GMO):
    # GMO : Gene Mapping Object
    symbol = "MTOR"
    geneid = GMO.getSymbolGeneID(symbol)[0]
    symbol = GMO.getGeneIDSymbol(geneid)[0]
    ensembl = GMO.getSymbolEnsembl(symbol)[0]
    symbol = GMO.getEnsemblSymbol(ensembl)[0]
#edef

In [2]:
print(hm)

HumanMapping object
 Objects:
  * [ ] geneid2ensemblgene
  * [ ] geneid2genesymbol
  * [ ] geneid2uniprot
 Files:
  * [X] geneid2ensemblgene : /exports/molepi/tgehrmann/data/humanMappings/geneid2ensembl.tsv
  * [ ] gene2refseq : /exports/molepi/tgehrmann/data/humanMappings/gene2refseq.tsv
  * [X] geneid2genesymbol : /exports/molepi/tgehrmann/data/humanMappings/geneinfo.tsv
  * [ ] uniprotmap : /exports/molepi/tgehrmann/data/humanMappings/uniprotmap.tsv



## Mapping between Ensembl/GeneID/Symbols

There are several functions defined to switch between the different IDs:

 * `getSymbolGeneID`
 * `getSymbolEnsembl`
 * `getGeneIDSymbol`
 * `getGeneIDEnsembl`
 * `getEnsemblSymbol`
 * `getEnsemblGeneID`

In [3]:
exampleMapping(hm)

/exports/molepi/tgehrmann/data/_downloads/54bb07ff35ab22c2ee291978b19d97d0d3fbf7b6
0
0
0
/exports/molepi/tgehrmann/data/humanMappings/geneinfo.tsv


D: /exports/molepi/tgehrmann/data/_downloads
D: 54bb07ff35ab22c2ee291978b19d97d0d3fbf7b6
D: 
D: Loading the index from pickle


MTOR -> 2475
2475 -> MTOR
/exports/molepi/tgehrmann/data/_downloads/975a9d25db851d17c89d6306f7162e672c2e66c3
0
0
0
/exports/molepi/tgehrmann/data/humanMappings/geneid2ensembl.tsv


D: /exports/molepi/tgehrmann/data/_downloads
D: 975a9d25db851d17c89d6306f7162e672c2e66c3
D: 
D: Loading the index from pickle


MTOR -> ENSG00000198793
ENSG00000198793 -> MTOR


### Mapping with SQLite instead of pickled Maps
Fast initialization, but slower operations.
Because of the high speed initialization, we can perform queries on a larger number of structures, including the gene2refseq index, and the uniprotmap, which is prohibitively large for the map.

In [4]:
hms = biu.maps.HumanS()

D: Initializing the SQLiteResourceManager object NOW


In [5]:
exampleMapping(hms)

MTOR -> 2475
2475 -> MTOR
MTOR -> ENSG00000198793
ENSG00000198793 -> MTOR


In [6]:
print("Map Lookup")
%timeit exampleMappingSilent(hm)
print("SQLite lookup")
%timeit exampleMappingSilent(hms)

Map Lookup
16.5 µs ± 475 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
SQLite lookup
714 ms ± 4.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Get all the mappings

If you have one ID, and want the others, use the
 * `fromEnsembl`
 * `fromGeneID`
 * `fromSymbol`

functions to return a named tuple of all the IDs

In [7]:
hm.fromEnsembl("ENSG00000198793")

GeneIDMapping(geneID='2475', ensemblID='ENSG00000198793', symbol='MTOR')

In [8]:
hm.fromGeneID("2475")

GeneIDMapping(geneID='2475', ensemblID='ENSG00000198793', symbol='MTOR')

In [9]:
hm.fromSymbol("MTOR")

GeneIDMapping(geneID='2475', ensemblID='ENSG00000198793', symbol='MTOR')