diff --git a/atomic.c b/atomic.c index 8874da0..3d9f66c 100644 --- a/atomic.c +++ b/atomic.c @@ -246,10 +246,11 @@ const bcf_atom_t *bcf_atom_read(bcf_atombuf_t *buf) } } -void bcf_atom2bcf(const bcf_atom_t *a, bcf1_t *b, int write_M, int id_GT) +void bcf_atom2bcf2(const bcf_atom_t *a, bcf1_t *b, int write_M, int id_GT, int use_missing) { static uint8_t conv[4] = { 1<<1, 2<<1, 0<<1, 3<<1 }; static uint8_t conv_no_M[4] = { 1<<1, 2<<1, 0<<1, 1<<1 }; + static uint8_t conv_no_M_missing[4] = { 1<<1, 2<<1, 0<<1, 0<<1 }; b->rid = a->rid, b->pos = a->pos, b->rlen = a->rlen; b->qual = 0, b->n_info = b->n_fmt = b->n_sample = 0; b->n_allele = write_M && a->has_multi? 3 : 2; @@ -271,9 +272,17 @@ void bcf_atom2bcf(const bcf_atom_t *a, bcf1_t *b, int write_M, int id_GT) for (i = 0; i < b->n_sample<<1; ++i) b->indiv.s[b->indiv.l++] = conv[a->gt[i]] | a->phased; } else { - for (i = 0; i < b->n_sample<<1; ++i) - b->indiv.s[b->indiv.l++] = conv_no_M[a->gt[i]] | a->phased; + if (use_missing) + for (i = 0; i < b->n_sample<<1; ++i) + b->indiv.s[b->indiv.l++] = conv_no_M_missing[a->gt[i]] | a->phased; + else + for (i = 0; i < b->n_sample<<1; ++i) + b->indiv.s[b->indiv.l++] = conv_no_M[a->gt[i]] | a->phased; } b->indiv.s[b->indiv.l] = 0; } } + +void bcf_atom2bcf(const bcf_atom_t *a, bcf1_t *b, int write_M, int id_GT) +{ +} diff --git a/atomic.h b/atomic.h index 0f65769..062009d 100644 --- a/atomic.h +++ b/atomic.h @@ -29,6 +29,7 @@ void bcf_atomize(const bcf_hdr_t *h, bcf1_t *b, bcf_atom_v *a); bcf_atombuf_t *bcf_atombuf_init(htsFile *in, int keep_flt); void bcf_atombuf_destroy(bcf_atombuf_t *buf); const bcf_atom_t *bcf_atom_read(bcf_atombuf_t *buf); +void bcf_atom2bcf2(const bcf_atom_t *a, bcf1_t *b, int write_M, int id_GT, int use_missing); void bcf_atom2bcf(const bcf_atom_t *a, bcf1_t *b, int write_M, int id_GT); void bcf_atom_print(const bcf_hdr_t *h, int n, const bcf_atom_t *aa); diff --git a/import.c b/import.c index 504cfc4..a6bf879 100644 --- a/import.c +++ b/import.c @@ -134,18 +134,19 @@ int main_bcfidx(int argc, char *argv[]) int main_atomize(int argc, char *argv[]) { - int c, vcf_in = 0, bcf_out = 0, write_M = 0, id_GT = -1; + int c, vcf_in = 0, bcf_out = 0, write_M = 0, id_GT = -1, use_missing = 0; char moder[8], modew[8], *fn_ref = 0; htsFile *in, *out; bcf_atombuf_t *ab; const bcf_atom_t *a; bcf1_t *b; - while ((c = getopt(argc, argv, "bSMt:")) >= 0) { + while ((c = getopt(argc, argv, "mbSMt:")) >= 0) { if (c == 'S') vcf_in = 1; else if (c == 't') vcf_in = 1, fn_ref = optarg; else if (c == 'b') bcf_out = 1; else if (c == 'M') write_M = 1; + else if (c == 'm') use_missing = 1; } if (optind == argc) { fprintf(stderr, "Usage: bgt atomize [options] |\n"); @@ -153,7 +154,8 @@ int main_atomize(int argc, char *argv[]) fprintf(stderr, " -b BCF output\n"); fprintf(stderr, " -S VCF input\n"); fprintf(stderr, " -t FILE list of contig names and lengths (force -S)\n"); - fprintf(stderr, " -M use symbolic allele\n"); + fprintf(stderr, " -M use at a multi-allelic site (override -m)\n"); + fprintf(stderr, " -m use . at a multi-allelic genotype\n"); return 1; } @@ -176,7 +178,7 @@ int main_atomize(int argc, char *argv[]) b = bcf_init1(); while ((a = bcf_atom_read(ab)) != 0) { - bcf_atom2bcf(a, b, write_M, id_GT); + bcf_atom2bcf2(a, b, write_M, id_GT, use_missing); vcf_write1(out, ab->h, b); } bcf_destroy1(b); diff --git a/main.c b/main.c index 26ed8a7..193b763 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ #include #include -#define BGT_VERSION "1.0-r280-dirty" +#define BGT_VERSION "1.0-r281-dirty" int main_import(int argc, char *argv[]); int main_view(int argc, char *argv[]);