diff --git a/Makefile b/Makefile index ab12a7c87..d8e5ed880 100644 --- a/Makefile +++ b/Makefile @@ -178,6 +178,7 @@ polysomy.o: polysomy.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(bcftools_ peakfit.o: peakfit.c peakfit.h $(htslib_hts_h) $(HTSDIR)/htslib/kstring.h consensus.o: consensus.c $(htslib_hts_h) $(HTSDIR)/htslib/kseq.h rbuf.h $(bcftools_h) $(HTSDIR)/htslib/regidx.h mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_regidx_h) $(bcftools_h) $(call_h) $(bam2bcf_h) $(sample_h) +sample.o: $(sample_h) $(htslib_hts_h) $(HTSDIR)/htslib/khash_str2int.h version.o: version.h version.c test/test-rbuf.o: test/test-rbuf.c rbuf.h diff --git a/doc/bcftools.txt b/doc/bcftools.txt index c6222cf42..60b0a5b91 100644 --- a/doc/bcftools.txt +++ b/doc/bcftools.txt @@ -1360,6 +1360,12 @@ regions listed in the BED file. *--ff, --excl-flags* 'STR'|'INT':: Filter flags: skip reads with mask bits set [UNMAP,SECONDARY,QCFAIL,DUP] +*-s, --samples* 'LIST':: + list of sample names. See *<>* + +*-S, --samples-file* 'FILE':: + file of sample names. See *<>* + *-x, --ignore-overlaps*:: Disable read-pair overlap detection. diff --git a/mpileup.c b/mpileup.c index 6dfbc35b4..93f0092eb 100644 --- a/mpileup.c +++ b/mpileup.c @@ -60,12 +60,12 @@ DEALINGS IN THE SOFTWARE. */ typedef struct { int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag; - int rflag_require, rflag_filter, output_type; + int rflag_require, rflag_filter, output_type, sample_is_file; int openQ, extQ, tandemQ, min_support; // for indels double min_frac; // for indels - char *reg, *pl_list, *fai_fname, *output_fname; + char *reg, *pl_list, *fai_fname, *output_fname, *samples_list; faidx_t *fai; - void *rghash; + void *rghash_exc; regidx_t *bed; gvcf_t *gvcf; int argc; @@ -86,6 +86,7 @@ typedef struct { bam_hdr_t *h; mplp_ref_t *ref; const mplp_conf_t *conf; + void *rghash_inc; // whitelist for readgroups on per-bam basis } mplp_aux_t; typedef struct { @@ -158,6 +159,7 @@ static int mplp_func(void *data, bam1_t *b) int ret, skip = 0, ref_len; do { int has_ref; + skip = 0; ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b); if (ret < 0) break; // The 'B' cigar operation is not part of the specification, considering as obsolete. @@ -172,9 +174,16 @@ static int mplp_func(void *data, bam1_t *b) skip = regidx_overlap(ma->conf->bed, ma->h->target_name[b->core.tid],b->core.pos, bam_endpos(b)-1, NULL) ? 0 : 1; if (skip) continue; } - if (ma->conf->rghash) { // exclude read groups + if (ma->rghash_inc) + { + // skip unless read group is listed + uint8_t *rg = bam_aux_get(b, "RG"); + if ( !rg ) { skip = 1; continue; } + if ( !khash_str2int_has_key(ma->rghash_inc, (const char*)(rg+1)) ) { skip = 1; continue; } + } + if (ma->conf->rghash_exc) { // exclude read groups uint8_t *rg = bam_aux_get(b, "RG"); - skip = (rg && khash_str2int_get(ma->conf->rghash, (const char*)(rg+1), NULL)==0); + skip = (rg && khash_str2int_get(ma->conf->rghash_exc, (const char*)(rg+1), NULL)==0); if (skip) continue; } if (ma->conf->flag & MPLP_ILLUMINA13) { @@ -278,7 +287,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bam_mplp_t iter; bam_hdr_t *h = NULL; /* header of first file in input list */ char *ref; - void *rghash = NULL; + void *rghash_exc = NULL; bcf_callaux_t *bca = NULL; bcf_callret1_t *bcr = NULL; @@ -303,6 +312,24 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) exit(EXIT_FAILURE); } + void *samples_inc = NULL; + if ( conf->samples_list ) + { + int nsamples = 0; + char **samples = hts_readlist(conf->samples_list, conf->sample_is_file, &nsamples); + if (!samples) { + fprintf(stderr,"[%s] could not read the samples list %s\n", __func__, conf->samples_list); + exit(EXIT_FAILURE); + } + if ( nsamples ) samples_inc = khash_str2int_init(); + for (i=0; iflag&MPLP_IGNORE_RG)? 0 : h_tmp->text); + if ( samples_inc ) data[i]->rghash_inc = khash_str2int_init(); + bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text, samples_inc, data[i]->rghash_inc); // Collect read group IDs with PL (platform) listed in pl_list (note: fragile, strstr search) - rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); + rghash_exc = bcf_call_add_rg(rghash_exc, h_tmp->text, conf->pl_list); if (conf->reg) { hts_idx_t *idx = sam_index_load(data[i]->fp, fn[i]); if (idx == NULL) { @@ -458,7 +486,6 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) // Initialise the calling algorithm bca = bcf_call_init(-1., conf->min_baseQ); bcr = (bcf_callret1_t*) calloc(sm->n, sizeof(bcf_callret1_t)); - bca->rghash = rghash; bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ; bca->min_frac = conf->min_frac; bca->min_support = conf->min_support; @@ -519,7 +546,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, 0, 0); flush_bcf_records(conf, bcf_fp, bcf_hdr, bcf_rec); // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring? - if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) + // check me: rghash in bcf_call_gap_prep() should have no effect, reads mplp_func already excludes them + if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, NULL) >= 0) { bcf_callaux_clean(bca, &bc); for (i = 0; i < gplp.n; ++i) @@ -552,15 +580,17 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bam_smpl_destroy(sm); free(buf.s); for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]); free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp); - bcf_call_del_rghash(rghash); + bcf_call_del_rghash(rghash_exc); bam_mplp_destroy(iter); bam_hdr_destroy(h); for (i = 0; i < n; ++i) { sam_close(data[i]->fp); if (data[i]->iter) hts_itr_destroy(data[i]->iter); + if ( data[i]->rghash_inc ) khash_str2int_destroy_free(data[i]->rghash_inc); free(data[i]); } free(data); free(plp); free(n_plp); + if ( samples_inc ) khash_str2int_destroy_free(samples_inc); free(mp_ref.ref[0]); free(mp_ref.ref[1]); return ret; @@ -689,6 +719,8 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp) " --ff, --excl-flags STR|INT filter flags: skip reads with mask bits set\n" " [%s]\n", tmp_filter); fprintf(fp, +" -s, --samples LIST comma separated list of samples to include\n" +" -S, --samples-file FILE file of samples to include\n" " -x, --ignore-overlaps disable read-pair overlap detection\n" "\n" "Output options:\n" @@ -774,6 +806,8 @@ int bam_mpileup(int argc, char *argv[]) {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"output-type", required_argument, NULL, 'O'}, + {"samples", required_argument, NULL, 's'}, + {"samples-file", required_argument, NULL, 'S'}, {"output-tags", required_argument, NULL, 't'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, @@ -786,7 +820,7 @@ int bam_mpileup(int argc, char *argv[]) {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; - while ((c = getopt_long(argc, argv, "Ag:f:r:l:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xt:",lopts,NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "Ag:f:r:l:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xt:s:S:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : @@ -824,6 +858,11 @@ int bam_mpileup(int argc, char *argv[]) case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; + case 's': mplp.samples_list = optarg; break; + case 'S': + mplp.samples_list = optarg; + mplp.sample_is_file = 1; + break; case 'O': switch (optarg[0]) { case 'b': mplp.output_type = FT_BCF_GZ; break; @@ -854,11 +893,11 @@ int bam_mpileup(int argc, char *argv[]) case 'G': { FILE *fp_rg; char buf[1024]; - mplp.rghash = khash_str2int_init(); + mplp.rghash_exc = khash_str2int_init(); if ((fp_rg = fopen(optarg, "r")) == NULL) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... - khash_str2int_inc(mplp.rghash, strdup(buf)); + khash_str2int_inc(mplp.rghash_exc, strdup(buf)); fclose(fp_rg); } break; @@ -907,7 +946,7 @@ int bam_mpileup(int argc, char *argv[]) } else ret = mpileup(&mplp, argc - optind, argv + optind); - if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash); + if (mplp.rghash_exc) khash_str2int_destroy_free(mplp.rghash_exc); free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) regidx_destroy(mplp.bed); diff --git a/sample.c b/sample.c index e48cf3a39..06777ee45 100644 --- a/sample.c +++ b/sample.c @@ -1,7 +1,7 @@ /* sample.c -- group data by sample. Copyright (C) 2010, 2011 Broad Institute. - Copyright (C) 2013 Genome Research Ltd. + Copyright (C) 2013, 2016 Genome Research Ltd. Author: Heng Li @@ -23,67 +23,53 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include #include +#include +#include #include "sample.h" -#include "htslib/khash.h" -KHASH_MAP_INIT_STR(sm, int) bam_sample_t *bam_smpl_init(void) { bam_sample_t *s; s = (bam_sample_t*) calloc(1, sizeof(bam_sample_t)); - s->rg2smid = kh_init(sm); - s->sm2id = kh_init(sm); + s->rg2smid = khash_str2int_init(); + s->sm2id = khash_str2int_init(); return s; } void bam_smpl_destroy(bam_sample_t *sm) { - int i; - khint_t k; - khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; - if (sm == 0) return; - for (i = 0; i < sm->n; ++i) free(sm->smpl[i]); + if ( !sm ) return; + if ( sm->rg2smid ) khash_str2int_destroy_free(sm->rg2smid); + if ( sm->sm2id ) khash_str2int_destroy_free(sm->sm2id); free(sm->smpl); - for (k = kh_begin(rg2smid); k != kh_end(rg2smid); ++k) - if (kh_exist(rg2smid, k)) free((char*)kh_key(rg2smid, k)); - kh_destroy(sm, (khash_t(sm)*) sm->rg2smid); - kh_destroy(sm, (khash_t(sm)*) sm->sm2id); free(sm); } -static void add_pair(bam_sample_t *sm, khash_t(sm) *sm2id, const char *key, const char *val) +static void add_pair(bam_sample_t *sm, void *sm2id, const char *readgroup, const char *sample) { - khint_t k_rg, k_sm; - int ret; - khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; - k_rg = kh_get(sm, rg2smid, key); - if (k_rg != kh_end(rg2smid)) return; // duplicated @RG-ID - k_rg = kh_put(sm, rg2smid, strdup(key), &ret); - k_sm = kh_get(sm, sm2id, val); - if (k_sm == kh_end(sm2id)) { // absent - if (sm->n == sm->m) { - sm->m = sm->m? sm->m<<1 : 1; - sm->smpl = (char**) realloc(sm->smpl, sizeof(char*) * sm->m); - } - sm->smpl[sm->n] = strdup(val); - k_sm = kh_put(sm, sm2id, sm->smpl[sm->n], &ret); - kh_val(sm2id, k_sm) = sm->n++; + if ( khash_str2int_has_key(sm->rg2smid,readgroup) ) return; // duplicated @RG-ID + + int ismpl; + if ( khash_str2int_get(sm2id, sample, &ismpl) < 0 ) + { + // the sample encountered for the first time + ismpl = sm->n++; + hts_expand0(char*,sm->n,sm->m,sm->smpl); + sm->smpl[ismpl] = strdup(sample); + khash_str2int_set(sm2id, sm->smpl[ismpl], ismpl); } - kh_val(rg2smid, k_rg) = kh_val(sm2id, k_sm); + khash_str2int_set(sm->rg2smid, strdup(readgroup), ismpl); } -int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) +int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt, void *white_list, void *white_hash) { const char *p = txt, *q, *r; kstring_t buf, first_sm; int n = 0; - khash_t(sm) *sm2id = (khash_t(sm)*)sm->sm2id; if (txt == 0) { - add_pair(sm, sm2id, fn, fn); + add_pair(sm, sm->sm2id, fn, fn); return 0; } memset(&buf, 0, sizeof(kstring_t)); @@ -99,36 +85,41 @@ int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u); for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v); ioq = *u; ior = *v; *u = *v = '\0'; - buf.l = 0; kputs(fn, &buf); kputc('/', &buf); kputs(q, &buf); - add_pair(sm, sm2id, buf.s, r); - if ( !first_sm.s ) - kputs(r,&first_sm); + if ( !white_list || khash_str2int_has_key(white_list,r) ) + { + buf.l = 0; kputs(fn, &buf); kputc('/', &buf); kputs(q, &buf); + add_pair(sm, sm->sm2id, buf.s, r); + if ( !first_sm.s ) + kputs(r,&first_sm); + if ( white_list ) khash_str2int_inc(white_hash,strdup(q)); + } *u = ioq; *v = ior; } else break; p = q > r? q : r; ++n; } - if (n == 0) add_pair(sm, sm2id, fn, fn); + if (n == 0) add_pair(sm, sm->sm2id, fn, fn); // If there is only one RG tag present in the header and reads are not annotated, don't refuse to work but // use the tag instead. else if ( n==1 && first_sm.s ) - add_pair(sm,sm2id,fn,first_sm.s); + add_pair(sm,sm->sm2id,fn,first_sm.s); if ( first_sm.s ) free(first_sm.s); -// add_pair(sm, sm2id, fn, fn); free(buf.s); return 0; } int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str) { - khint_t k; - khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid; - if (rg) { + int ismpl; + if ( rg ) + { str->l = 0; kputs(fn, str); kputc('/', str); kputs(rg, str); - k = kh_get(sm, rg2smid, str->s); - } else k = kh_get(sm, rg2smid, fn); - return k == kh_end(rg2smid)? -1 : kh_val(rg2smid, k); + if ( khash_str2int_get(sm->rg2smid, str->s, &ismpl) < 0 ) return -1; + return ismpl; + } + if ( khash_str2int_get(sm->rg2smid, fn, &ismpl) < 0 ) return -1; + return ismpl; } diff --git a/sample.h b/sample.h index 8e8efa5b7..f777a81f5 100644 --- a/sample.h +++ b/sample.h @@ -1,6 +1,7 @@ /* sample.h -- group data by sample. Copyright (C) 2010 Broad Institute. + Copyright (C) 2016 Genome Research Ltd. Author: Heng Li @@ -34,7 +35,7 @@ typedef struct { } bam_sample_t; bam_sample_t *bam_smpl_init(void); -int bam_smpl_add(bam_sample_t *sm, const char *abs, const char *txt); +int bam_smpl_add(bam_sample_t *sm, const char *abs, const char *txt, void *white_list, void *white_hash); int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str); void bam_smpl_destroy(bam_sample_t *sm); diff --git a/test/mpileup/mpileup.7.out b/test/mpileup/mpileup.7.out new file mode 100644 index 000000000..75e7fc96a --- /dev/null +++ b/test/mpileup/mpileup.7.out @@ -0,0 +1,70 @@ +##fileformat=VCFv4.2 +##FILTER= +##contig= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00101 HG00102 +17 100 . C <*> 0 . DP=9;I16=8,0,0,0,338,15668,0,0,449,26041,0,0,173,4019,0,0;QS=2,0;MQ0F=0 PL 0,9,108 0,15,134 +17 101 . C <*> 0 . DP=9;I16=8,0,0,0,319,14829,0,0,449,26041,0,0,172,3954,0,0;QS=2,0;MQ0F=0 PL 0,9,99 0,15,132 +17 102 . C <*> 0 . DP=9;I16=8,0,0,0,346,16476,0,0,449,26041,0,0,171,3895,0,0;QS=2,0;MQ0F=0 PL 0,9,111 0,15,139 +17 103 . T <*> 0 . DP=9;I16=8,0,0,0,354,17694,0,0,449,26041,0,0,170,3842,0,0;QS=2,0;MQ0F=0 PL 0,9,108 0,15,147 +17 104 . G <*> 0 . DP=9;I16=7,0,0,0,301,14499,0,0,420,25200,0,0,143,3121,0,0;QS=2,0;MQ0F=0 PL 0,6,89 0,15,133 +17 105 . G <*> 0 . DP=9;I16=8,0,0,0,298,12944,0,0,449,26041,0,0,166,3658,0,0;QS=2,0;MQ0F=0 PL 0,9,97 0,15,125 +17 106 . G <*> 0 . DP=9;I16=7,0,0,0,273,12195,0,0,420,25200,0,0,139,2953,0,0;QS=2,0;MQ0F=0 PL 0,6,85 0,15,124 +17 107 . C <*> 0 . DP=9;I16=8,0,0,0,330,15206,0,0,449,26041,0,0,162,3506,0,0;QS=2,0;MQ0F=0 PL 0,9,108 0,15,136 +17 108 . C <*> 0 . DP=9;I16=8,0,0,0,334,15764,0,0,449,26041,0,0,159,3393,0,0;QS=2,0;MQ0F=0 PL 0,9,108 0,15,135 +17 109 . T <*> 0 . DP=9;I16=8,0,0,0,374,19190,0,0,449,26041,0,0,156,3290,0,0;QS=2,0;MQ0F=0 PL 0,9,110 0,15,150 +17 110 . G <*> 0 . DP=9;I16=8,0,0,0,335,16083,0,0,449,26041,0,0,153,3197,0,0;QS=2,0;MQ0F=0 PL 0,9,104 0,15,136 +17 111 . G <*> 0 . DP=9;I16=6,0,0,0,268,13602,0,0,360,21600,0,0,107,2151,0,0;QS=2,0;MQ0F=0 PL 0,6,88 0,12,118 +17 112 . C <*> 0 . DP=9;I16=8,0,0,0,318,15164,0,0,449,26041,0,0,145,2945,0,0;QS=2,0;MQ0F=0 PL 0,9,95 0,15,135 +17 113 . A <*> 0 . DP=9;I16=7,0,0,0,311,15545,0,0,420,25200,0,0,116,2212,0,0;QS=2,0;MQ0F=0 PL 0,6,87 0,15,139 +17 114 . C <*> 0 . DP=9;I16=8,0,0,0,327,15347,0,0,449,26041,0,0,137,2741,0,0;QS=2,0;MQ0F=0 PL 0,9,103 0,15,133 +17 115 . C <*> 0 . DP=11;I16=8,0,0,0,341,16381,0,0,480,28800,0,0,108,2032,0,0;QS=2,0;MQ0F=0 PL 0,6,89 0,18,147 +17 116 . A <*> 0 . DP=11;I16=8,1,0,0,376,18032,0,0,509,29641,0,0,107,1965,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,6,90 0,21,175 +17 117 . G <*> 0 . DP=11;I16=8,1,0,0,370,17312,0,0,509,29641,0,0,105,1913,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,6,85 0,21,177 +17 118 . G <*> 0 . DP=11;I16=7,1,0,0,319,14789,0,0,449,26041,0,0,102,1876,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,3,60 0,21,162 +17 119 . G <*> 0 . DP=10;I16=8,1,0,0,321,14467,0,0,478,26882,0,0,125,2431,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,6,73 0,21,160 +17 120 . A <*> 0 . DP=10;I16=8,1,0,0,361,17053,0,0,478,26882,0,0,123,2373,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,6,83 0,21,171 +17 121 . G <*> 0 . DP=10;I16=8,1,0,0,347,15913,0,0,478,26882,0,0,121,2327,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,6,80 0,21,168 +17 122 . C <*> 0 . DP=11;I16=9,1,0,0,396,18360,0,0,538,30482,0,0,119,2293,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,99 0,21,178 +17 123 . T <*> 0 . DP=10;I16=8,1,0,0,387,19215,0,0,478,26882,0,0,119,2271,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,112 0,18,166 +17 124 . T <*> 0 . DP=10;I16=8,1,0,0,350,15768,0,0,478,26882,0,0,119,2261,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,104 0,18,154 +17 125 . A <*> 0 . DP=10;I16=8,1,0,0,358,16494,0,0,478,26882,0,0,118,2214,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,104 0,18,162 +17 126 . A <*> 0 . DP=10;I16=8,1,0,0,373,17399,0,0,478,26882,0,0,117,2181,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,107 0,18,174 +17 127 . C <*> 0 . DP=10;I16=8,1,0,0,366,16632,0,0,478,26882,0,0,116,2162,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,109 0,18,160 +17 128 . A <*> 0 . DP=10;I16=8,1,0,0,378,17674,0,0,478,26882,0,0,115,2157,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,111 0,18,162 +17 129 . A <*> 0 . DP=9;I16=7,1,0,0,354,17046,0,0,418,23282,0,0,115,2165,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,113 0,15,159 +17 130 . A <*> 0 . DP=9;I16=7,1,0,0,349,16363,0,0,418,23282,0,0,115,2185,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,113 0,15,152 +17 131 . C <*> 0 . DP=9;I16=7,1,0,0,330,14822,0,0,418,23282,0,0,115,2217,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,110 0,15,147 +17 132 . A <*> 0 . DP=9;I16=7,1,0,0,348,16432,0,0,418,23282,0,0,115,2261,0,0;QS=2,0;MQSB=1;MQ0F=0 PL 0,9,110 0,15,151 +17 133 . T <*> 0 . DP=8;I16=6,2,0,0,315,12451,0,0,418,23282,0,0,141,2941,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,105 0,15,150 +17 134 . C <*> 0 . DP=8;I16=6,2,0,0,314,12374,0,0,418,23282,0,0,142,3006,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,105 0,15,152 +17 135 . T <*> 0 . DP=8;I16=6,2,0,0,314,12470,0,0,418,23282,0,0,143,3081,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,106 0,15,156 +17 136 . G <*> 0 . DP=8;I16=6,2,0,0,291,10787,0,0,418,23282,0,0,145,3165,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,105 0,15,134 +17 137 . T <*> 0 . DP=8;I16=6,2,0,0,300,11444,0,0,418,23282,0,0,148,3258,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,104 0,15,139 +17 138 . C <*> 0 . DP=8;I16=6,2,0,0,300,11542,0,0,418,23282,0,0,150,3312,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,108 0,15,142 +17 139 . C <*> 0 . DP=8;I16=6,2,0,0,291,10833,0,0,418,23282,0,0,152,3378,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,106 0,15,143 +17 140 . A <*> 0 . DP=8;I16=6,2,0,0,314,12410,0,0,418,23282,0,0,153,3405,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,107 0,15,153 +17 141 . G <*> 0 . DP=8;I16=6,2,0,0,295,11151,0,0,418,23282,0,0,153,3391,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,108 0,15,142 +17 142 . C <*> 0 . DP=8;I16=6,2,0,0,264,9036,0,0,418,23282,0,0,153,3385,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,97 0,15,129 +17 143 . G <*> 0 . DP=8;I16=5,2,0,0,218,7170,0,0,358,19682,0,0,146,3338,0,0;QS=2,0;MQSB=0.7;MQ0F=0 PL 0,9,95 0,12,97 +17 144 . A <*> 0 . DP=8;I16=6,2,0,0,287,10705,0,0,418,23282,0,0,153,3397,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,105 0,15,129 +17 145 . A <*> 0 . DP=8;I16=6,2,0,0,294,11200,0,0,418,23282,0,0,153,3415,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,106 0,15,138 +17 146 . T <*> 0 . DP=8;I16=6,2,0,0,281,10419,0,0,418,23282,0,0,153,3441,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,103 0,15,128 +17 147 . A <*> 0 . DP=8;I16=6,2,0,0,286,10442,0,0,418,23282,0,0,153,3475,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,99 0,15,140 +17 148 . C <*> 0 . DP=8;I16=6,2,0,0,301,11401,0,0,418,23282,0,0,152,3466,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,106 0,15,146 +17 149 . C <*> 0 . DP=8;I16=6,2,0,0,293,10907,0,0,418,23282,0,0,150,3414,0,0;QS=2,0;MQSB=0.666667;MQ0F=0 PL 0,9,109 0,15,140 +17 150 . T <*> 0 . DP=7;I16=5,2,0,0,277,11093,0,0,389,22441,0,0,149,3369,0,0;QS=2,0;MQSB=0.5;MQ0F=0 PL 0,6,84 0,15,152 diff --git a/test/mplp.samples b/test/mplp.samples new file mode 100644 index 000000000..1a9b396e6 --- /dev/null +++ b/test/mplp.samples @@ -0,0 +1,2 @@ +HG00101 +HG00102 diff --git a/test/test.pl b/test/test.pl index 06cb9b80d..f10d41650 100755 --- a/test/test.pl +++ b/test/test.pl @@ -248,6 +248,8 @@ test_mpileup($opts,in=>[qw(1 2 3)],out=>'mpileup/mpileup.4.out',args=>q[-t DP,DPR,DV,DP4,INFO/DPR,SP -r17:100-600]); #test files from samtools mpileup test suite test_mpileup($opts,in=>[qw(1 2 3)],out=>'mpileup/mpileup.5.out',args=>q[-t DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR -r17:100-600]); test_mpileup($opts,in=>[qw(1 2 3)],out=>'mpileup/mpileup.6.out',args=>q[-t DP,DV -r17:100-600 --gvcf 0,2,5]); +test_mpileup($opts,in=>[qw(1 2 3)],out=>'mpileup/mpileup.7.out',args=>q[-r17:100-150 -s HG00101,HG00102]); +test_mpileup($opts,in=>[qw(1 2 3)],out=>'mpileup/mpileup.7.out',args=>q[-r17:100-150 -S {PATH}/mplp.samples]); print "\nNumber of tests:\n"; printf " total .. %d\n", $$opts{nok}+$$opts{nfailed}; @@ -864,6 +866,7 @@ sub test_mpileup { my ($opts,%args) = @_; + $args{args} =~ s/{PATH}/$$opts{path}/g; for my $fmt ('bam','cram') { my @files = ();