# Accessing the BBMRI dataset

In [1]:
import biu

## Initializing and examining the structure

In [2]:
bbmri = biu.db.BBMRI()
print(bbmri)

BBMRI object
 Where: /exports/molepi/BBMRISEQ
 Version: current
 Objects:
  * [ ] vcf[1]
  * [ ] vcf[2]
  * [ ] vcf[3]
  * [ ] vcf[4]
  * [ ] vcf[5]
  * [ ] vcf[6]
  * [ ] vcf[7]
  * [ ] vcf[8]
  * [ ] vcf[9]
  * [ ] vcf[10]
  * [ ] vcf[11]
  * [ ] vcf[12]
  * [ ] vcf[13]
  * [ ] vcf[14]
  * [ ] vcf[15]
  * [ ] vcf[16]
  * [ ] vcf[17]
  * [ ] vcf[18]
  * [ ] vcf[19]
  * [ ] vcf[20]
  * [ ] vcf[21]
  * [ ] vcf[22]
  * [ ] vcf[M]
  * [ ] vcf[X]
  * [ ] vcf[Y]
 Files:
  * [X] vcf_1 : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr1.vcf.bgz
  * [X] vcf_1_tbi : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr1.vcf.bgz.tbi
  * [X] vcf_2 : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr2.vcf.bgz
  * [X] vcf_2_tbi : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr2.vcf.bgz.tbi
  * [X] vcf_3 : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr3.vcf.bgz
  * [X] vcf_3_tbi : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr3.vcf.bgz.tbi
  * [X] vcf_4 : /exports/molepi/BBMRISEQ/tbx/merged.bbmri.chr4.vcf.bgz
  * [X] v

## Querying the structure

The data held in BBMRI are tabix indexed VCF files. Thus, we can make use of the `query` and `queryRegions` functionalities of the VCF structures.


In [7]:
for record in bbmri.query(1, 1000000, 1001000, types=['snp']):
    print(record)

Record(CHROM=1, POS=1000760, REF=G, ALT=[A])
Record(CHROM=1, POS=1000894, REF=A, ALT=[T])
Record(CHROM=1, POS=1000902, REF=G, ALT=[A])
Record(CHROM=1, POS=1000910, REF=C, ALT=[T])
Record(CHROM=1, POS=1000930, REF=G, ALT=[T])
Record(CHROM=1, POS=1000940, REF=T, ALT=[A])


In [6]:
for record in bbmri.queryRegions([ (1, 1000000, 1001000), (2, 1000000, 1001000)], types=['snp']):
    print(record)

Record(CHROM=1, POS=1000760, REF=G, ALT=[A])
Record(CHROM=1, POS=1000894, REF=A, ALT=[T])
Record(CHROM=1, POS=1000902, REF=G, ALT=[A])
Record(CHROM=1, POS=1000910, REF=C, ALT=[T])
Record(CHROM=1, POS=1000930, REF=G, ALT=[T])
Record(CHROM=1, POS=1000940, REF=T, ALT=[A])
Record(CHROM=2, POS=1000024, REF=T, ALT=[C])
Record(CHROM=2, POS=1000029, REF=G, ALT=[A])
Record(CHROM=2, POS=1000127, REF=C, ALT=[T])
Record(CHROM=2, POS=1000245, REF=A, ALT=[G])
Record(CHROM=2, POS=1000283, REF=C, ALT=[T])
Record(CHROM=2, POS=1000370, REF=G, ALT=[A])
Record(CHROM=2, POS=1000738, REF=A, ALT=[G])


In [8]:
bbmri.queryRegions([ (1, 1000000, 1001000), (2, 1000000, 1001000)], types=['snp'], extract="summary")

Unnamed: 0,id,RR,R,RA,A,AA,O
0,1-1000760-G-A,97,0,1,0,0,0
1,1-1000894-A-T,97,0,1,0,0,0
2,1-1000902-G-A,95,0,1,0,0,2
3,1-1000910-C-T,71,0,25,0,1,1
4,1-1000930-G-T,97,0,1,0,0,0
5,1-1000940-T-A,97,0,1,0,0,0
6,2-1000024-T-C,93,0,5,0,0,0
7,2-1000029-G-A,28,0,51,0,19,0
8,2-1000127-C-T,77,0,21,0,0,0
9,2-1000245-A-G,96,0,2,0,0,0
