# Accessing the CADD database

In [1]:
import biu as biu
import numpy as np
# Set the default data location
where = '/exports/molepi/tgehrmann/data/'
biu.config.settings.setWhere(where)

# Which datasets will we use?
cadd   = biu.db.CADD() # CADD Scores

## Get the CADD scores at a specific base

In [2]:
cadd.query(22, 39917455)

D: Initializing the TabixTSVResourceManager object NOW


{'A': 23.0, 'G': 15.47, 'T': 23.3}

## Get the CADD score for a specific variant at a specific base

In [3]:
cadd.query(22, 39917455, alt='A')

23.0

## Get the CADD score for a region

In [4]:
cadd.query(22, 39917455, 39917460)

{(39917455, 'A'): 23.0,
 (39917455, 'G'): 15.47,
 (39917455, 'T'): 23.3,
 (39917456, 'A'): 17.69,
 (39917456, 'G'): 11.54,
 (39917456, 'T'): 13.95,
 (39917457, 'A'): 22.2,
 (39917457, 'C'): 25.0,
 (39917457, 'T'): 37.0,
 (39917458, 'C'): 24.5,
 (39917458, 'G'): 26.2,
 (39917458, 'T'): 26.0,
 (39917459, 'C'): 20.7,
 (39917459, 'G'): 0.334,
 (39917459, 'T'): 21.5,
 (39917460, 'C'): 14.27,
 (39917460, 'G'): 11.57,
 (39917460, 'T'): 14.28}

## Get the CADD score for a specific nucleotide for a region

In [5]:
cadd.query(22, 39917455, 39917460, alt='T')

{39917455: 23.3,
 39917456: 13.95,
 39917457: 37.0,
 39917458: 26.0,
 39917459: 21.5,
 39917460: 14.28}

## Get the CADD scores for several regions

In [6]:
cadd.queryRegions([(22, 39917455, 39917457), (22, 39917470, 39917472)])

{(39917455, 'A'): 23.0,
 (39917455, 'G'): 15.47,
 (39917455, 'T'): 23.3,
 (39917456, 'A'): 17.69,
 (39917456, 'G'): 11.54,
 (39917456, 'T'): 13.95,
 (39917457, 'A'): 22.2,
 (39917457, 'C'): 25.0,
 (39917457, 'T'): 37.0,
 (39917470, 'A'): 25.1,
 (39917470, 'C'): 24.5,
 (39917470, 'G'): 25.1,
 (39917471, 'A'): 7.153,
 (39917471, 'C'): 2.946,
 (39917471, 'T'): 5.053,
 (39917472, 'C'): 22.9,
 (39917472, 'G'): 14.56,
 (39917472, 'T'): 24.2}

## Get a CADD score percentile threshold for a region
This may help you set a region-specific CADD threshold

In [7]:
# Default is 95 percentile:
cadd.regionThresh(22, 39917455, 39917676)

26.775000000000002

In [8]:
# Arbitrary percentile:
cadd.regionThresh(22, 39917455, 39917676, 50)

20.7

## Get a CADD score percentile threshold for several regions

In [9]:
cadd.regionsThresh([ (22, 39917455, 39917676),
                     (22, 39917778, 39918607)] )

28.125000000000004