In [1]:
#Added removing multiallelic in PB

In [10]:
import os
import gzip

# --- Paths & setup ---
path = "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/"
vcfpath = os.path.join(path, "0.calls_Illumina")
outpath = os.path.join(path, "1.Illumina_PU20")
chroms = [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY"]

l_mode = {"MT", "STK", "VN", "RF"}

# Ensure output dirs exist
os.makedirs(os.path.join(outpath, "Valid_tools"), exist_ok=True)
os.makedirs(os.path.join(outpath, "Stat"), exist_ok=True)

# --- Dictionaries ---
dic_raw_all, dic_step1, dic_AF, dic_valid, dic_altzero, dic_VAF = {}, {}, {}, {}, {}, {}

# NEW: tumor ALT counts aggregated per (loc, REF) to find major ALT
# Structure: dic_tumor_counts[mode][loc][ref][alt] = alt_count_in_tumor
dic_tumor_counts = {}

# NEW: count multi-allelic exclusions per mode
dic_multiallelic = {}

# --- Helpers ---
def parse_af(ad_field):
    """Extract (AF, alt_count) from AD='ref,alt' style string. Returns (af, alt_count) or (None, None)."""
    try:
        ref, alt = (ad_field or "0,0").split(',')[:2]
        ref, alt = int(ref), int(alt)
        depth = ref + alt
        if depth == 0:
            return 0.0, 0
        return float(alt) / depth, alt
    except Exception:
        return None, None

def write_combined_header_if_needed(path_):
    if not os.path.exists(path_) or os.path.getsize(path_) == 0:
        with open(path_, "w") as fout:
            fout.write(
                "mode\tTotal\tAlt_in_T\tValid\tInvalid_Alt_in_LBL\t"
                "Invalid_NoAlt_in_LT\tInvalid_NoAlt_in_ST\tMultiAllelic\tValid_percent\n"
            )

combined_stat_path = os.path.join(outpath, "Stat", "Stat_snv.Valid.ALL.txt")
write_combined_header_if_needed(combined_stat_path)

# --- Main loop ---
for mode in l_mode:
    tool = mode.split(".")[0]

    dic_valid[mode] = []
    dic_multiallelic[mode] = 0
    if mode not in dic_AF:
        dic_AF[mode] = {}
    if mode not in dic_tumor_counts:
        dic_tumor_counts[mode] = {}

    print(f"\n+++ {mode} started +++")

    out_vcf_path = f"{outpath}/Valid_tools/{mode}.Valid.vcf"
    out_vcf = open(out_vcf_path, "w")

    # per-chrom stat file (kept, as in your original)
    out_stat_path = f"{outpath}/Stat/Stat_snv_{mode}.Valid.txt"
    out_stat = open(out_stat_path, "w")
    out_stat.write(
        "mode\tchrom\tTotal\tAlt_in_T\tValid\tInvalid_Alt_in_LBL\t"
        "Invalid_NoAlt_in_LT\tInvalid_NoAlt_in_ST\tMultiAllelic\tValid_percent\n"
    )

    # Mode-level totals (for combined summary)
    m_total = m_alt_in_t = m_valid = m_alt_in_bl = m_alt_notin_lt = m_alt_notin_st = m_multiallelic = 0

    for chrom in chroms:
        # --- counters per chrom ---
        cnt_total = cnt_alt_in_t = cnt_noalt_in_bl = cnt_alt_in_bl = cnt_alt_notin_lt = cnt_alt_notin_st = 0
        cnt_multiallelic_chr = 0
        header_written = False

        # --- Read PU support (tumor, blood) and keep ALL ALTs (SNVs + INDELs) ---
        pu_file = os.path.join(vcfpath, "PU_bcftools", f"{mode}.snv.{chrom}.PU.norm.vcf.gz")
        dic_step1.setdefault(chrom, {})
        dic_tumor_counts[mode].setdefault(chrom, {})  # not used directly, but keep per chrom key

        # Note: we do NOT filter by len(ref/alt) here, so we can see competing INDEL alts
        if os.path.exists(pu_file):
            with open(pu_file, "r") as fpu:
                for line in fpu:
                    if line.startswith("#"):
                        continue
                    s = line.rstrip("\n").split("\t")
                    if s[0] != chrom:
                        continue
                    pos, ref, alt = s[1], s[3], s[4]
                    if alt == "<*>":
                        continue

                    loc = f"{chrom}:{pos}"
                    var = f"{ref}:{alt}"
                    dic_AF[mode].setdefault(loc, {})[var] = {}

                    # tumor = sample at index -2, blood = sample at index -1 (your convention)
                    ad_t = s[-2].split(':')[-1] if ":" in s[-2] else s[-2]
                    af_t, alt_t = parse_af(ad_t)
                    ad_bl = s[-1].split(':')[-1] if ":" in s[-1] else s[-1]
                    af_bl, alt_bl = parse_af(ad_bl)

                    dic_AF[mode][loc][var]["l_t"] = (af_t, alt_t)
                    dic_AF[mode][loc][var]["l_bl"] = (af_bl, alt_bl)

                    # Track tumor counts per (loc, REF) across all ALTs to decide major ALT
                    ref_map = dic_tumor_counts[mode].setdefault(loc, {}).setdefault(ref, {})
                    # use 0 if missing
                    ref_map[alt] = alt_t if alt_t is not None else 0

                    # Optionally store VAFs (unchanged from your code)
                    variant = f"{chrom}:{pos}:{ref}:{alt}"
                    dic_VAF[variant] = {"VAF_pb": af_t, "VAF_ill": None, "VAF_bl": af_bl}

        # --- Validate against short-read calls (SNVs only as in your original input) ---
        input_vcf = os.path.join(vcfpath, "input", f"{mode}.snv.vcf.gz")
        with gzip.open(input_vcf, 'rt') as fin:
            for line in fin:
                if line.startswith("#"):
                    if not header_written:
                        out_vcf.write(line)
                        header_written = True
                    continue

                s = line.rstrip("\n").split("\t")
                if s[0] != chrom:
                    continue

                cnt_total += 1
                pos, ref, alt = s[1], s[3], s[4]  # SNV here
                loc, var = f"{chrom}:{pos}", f"{ref}:{alt}"
                info = f"{loc}:{var}"
                dic_raw_all.setdefault(info, []).append(tool)

                # Must exist in PU dictionary to be considered
                if (loc in dic_AF[mode]) and (var in dic_AF[mode][loc]):
                    af_t, ad_t = dic_AF[mode][loc][var]["l_t"]
                    af_bl, ad_bl = dic_AF[mode][loc][var]["l_bl"]

                    if (af_t is not None) and (ad_t > 1):  # tumor supports alt
                        cnt_alt_in_t += 1

                        # any blood ALT evidence across ANY competing allele at this loc?
                        blood_has_alt = any(
                            (dic_AF[mode][loc][other]["l_bl"][1] or 0) > 2
                            for other in dic_AF[mode][loc]
                        )

                        if (af_bl == 0.0 or af_bl is None) and not blood_has_alt:
                            # --- Multi-allelic check: compare among ALTs with the SAME REF only ---
                            ref_map = dic_tumor_counts[mode].get(loc, {}).get(ref, {})
                            if ref_map:
                                # counts for this ALT
                                this_cnt = ref_map.get(alt, 0)
                                # find major ALT and its count
                                major_alt, major_cnt = max(ref_map.items(), key=lambda kv: kv[1] if kv[1] is not None else -1)
                                # Mark multi-allelic ONLY if major strictly exceeds the current alt and alt differs
                                if (alt != major_alt) and (major_cnt is not None) and (major_cnt > this_cnt):
                                    dic_multiallelic[mode] += 1
                                    cnt_multiallelic_chr += 1
                                    # do NOT write to Valid.vcf
                                    continue

                            # Passes (not multi-allelic or it's the major ALT)
                            cnt_noalt_in_bl += 1
                            dic_valid[mode].append(loc)
                            out_vcf.write(line)
                            dic_step1[chrom].setdefault(info, []).append(tool)
                        else:
                            cnt_alt_in_bl += 1
                    else:
                        dic_altzero.setdefault(info, []).append(tool)
                else:
                    # no matching entry in PU table
                    cnt_alt_notin_lt += 1

        # --- Write per-chrom stats (as in your original, now with MultiAllelic this-chr) ---
        valid_percent_chr = (cnt_noalt_in_bl / cnt_total * 100) if cnt_total else 0.0
        out_stat.write(
            f"{mode}\t{chrom}\t{cnt_total}\t{cnt_alt_in_t}\t{cnt_noalt_in_bl}\t"
            f"{cnt_alt_in_bl}\t{cnt_alt_notin_lt}\t{cnt_alt_notin_st}\t"
            f"{cnt_multiallelic_chr}\t{valid_percent_chr:.2f}\n"
        )

        # accumulate mode totals
        m_total += cnt_total
        m_alt_in_t += cnt_alt_in_t
        m_valid += cnt_noalt_in_bl
        m_alt_in_bl += cnt_alt_in_bl
        m_alt_notin_lt += cnt_alt_notin_lt
        m_alt_notin_st += cnt_alt_notin_st
        m_multiallelic += cnt_multiallelic_chr

        print(chrom, cnt_total, cnt_alt_in_t, cnt_noalt_in_bl, cnt_alt_in_bl, cnt_alt_notin_lt, cnt_multiallelic_chr)

    out_vcf.close()
    out_stat.close()

    # --- Combined one-line-per-mode stat file ---
    valid_percent_mode = (m_valid / m_total * 100) if m_total else 0.0
    with open(combined_stat_path, "a") as fout:
        fout.write(
            f"{mode}\t{m_total}\t{m_alt_in_t}\t{m_valid}\t{m_alt_in_bl}\t"
            f"{m_alt_notin_lt}\t{m_alt_notin_st}\t{m_multiallelic}\t{valid_percent_mode:.2f}\n"
        )



+++ VN started +++
chr1 2445 2179 2117 62 223 0
chr2 4269 3993 3900 93 237 0
chr3 3686 3484 3410 74 179 0
chr4 3901 3682 3594 88 190 0
chr5 2034 1736 1694 42 266 0
chr6 3134 2920 2835 85 185 0
chr7 2211 2067 2015 52 123 0
chr8 3136 2978 2897 80 139 1
chr9 1746 1599 1552 47 129 0
chr10 1565 1332 1294 38 199 0
chr11 1471 1275 1244 31 168 0
chr12 2419 2251 2183 68 151 0
chr13 1898 1762 1730 32 120 0
chr14 980 894 874 20 76 0
chr15 850 766 736 30 78 0
chr16 1105 996 969 27 97 0
chr17 1126 1024 996 28 92 0
chr18 959 811 785 26 111 0
chr19 1002 914 888 26 72 0
chr20 1135 1076 1039 37 55 0
chr21 703 671 653 18 28 0
chr22 510 482 471 11 26 0
chrX 2003 1720 1690 30 264 0
chrY 0 0 0 0 0 0

+++ RF started +++
chr1 2049 2004 1928 76 43 0
chr2 3797 3767 3671 96 29 0
chr3 3274 3227 3140 87 43 0
chr4 3447 3420 3340 80 27 0
chr5 1591 1583 1549 34 7 0
chr6 2651 2643 2565 78 5 0
chr7 2659 2602 2529 73 50 0
chr8 2794 2781 2702 79 13 0
chr9 1552 1531 1486 45 19 0
chr10 1191 1189 1157 31 1 1
chr11 1174 11

import os
import gzip

# --- Paths & setup ---
path = "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/"
vcfpath = os.path.join(path, "0.calls_Illumina")
outpath = os.path.join(path, "1.Illumina_PU20")
chroms = [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY"]

l_mode = {"MT", "STK", "VN", "RF"}

# --- Dictionaries ---
dic_raw_all, dic_step1, dic_AF, dic_valid, dic_altzero, dic_VAF = {}, {}, {}, {}, {}, {}

# --- Helpers ---
def parse_af(ad_field):
    """Extract (AF, alt_count) from RO,AO string (AD)."""
    try:
        ref, alt = int(ad_field.split(',')[0]), int(ad_field.split(',')[1])
        depth = ref + alt
        if depth == 0:
            return 0, 0
        return float(alt) / depth, alt
    except Exception:
        return None, None

# --- Main loop ---
for mode in l_mode:
    tool = mode.split(".")[0]
    dic_valid[mode] = []
    print(f"\n+++ {mode} started +++")
    out_vcf = open(f"{outpath}/Valid_tools/{mode}.Valid.vcf", "w")
    out_stat = open(f"{outpath}/Stat/Stat_snv_{mode}.Valid.txt", "a")
    out_stat.write("mode\tchrom\tTotal\tAlt_in_T\tValid\tInvalid_Alt_in_LBL\t"
                   "Invalid_NoAlt_in_LT\tInvalid_NoAlt_in_ST\tValid_percent\n")

    for chrom in chroms:
        # --- Read PacBio support (PU file) ---
        pu_file = os.path.join(vcfpath, "PU_bcftools", str(mode)+'.snv.' + chrom + ".PU.norm.vcf.gz")
        if mode not in dic_AF: dic_AF[mode] = {}
        if chrom not in dic_step1: dic_step1[chrom] = {}

        with open(pu_file) as f:
            for line in f:
                if line.startswith("#"): continue
                s = line.strip().split("\t")
                if s[0] != chrom: continue
                pos, ref, alt = s[1], s[3], s[4]
                if alt == "<*>" or len(ref) != 1 or len(alt) != 1: continue
                loc, var = f"{chrom}:{pos}", f"{ref}:{alt}"
                variant = f"{chrom}:{pos}:{ref}:{alt}"
                dic_AF[mode].setdefault(loc, {})[var] = {}
                ad_t = s[-2].split(':')[-1]
                af_t, alt_t = parse_af(ad_t)
                ad_bl = s[-1].split(':')[-1]
                af_bl, alt_bl = parse_af(ad_bl)
                dic_AF[mode][loc][var]["l_t"] = (af_t, alt_t)
                dic_AF[mode][loc][var]["l_bl"] = (af_bl, alt_bl)

                # --- NEW: store PacBio tumor VAF and Illumina blood VAF ---
                dic_VAF[variant] = {
                    "VAF_pb": af_t,
                    "VAF_ill": None,   # will be filled from pileups later
                    "VAF_bl": af_bl
                }

        # --- Validate against short-read calls ---
        cnt_total = cnt_alt_in_t = cnt_noalt_in_bl = cnt_alt_in_bl = cnt_alt_notin_lt = cnt_alt_notin_st = 0
        header_written = False

        with gzip.open(vcfpath + '/input/' + mode + '.snv.vcf.gz', 'rt') as f:
            for line in f:
                if line.startswith("#"):
                    if not header_written:
                        out_vcf.write(line)
                        header_written = True
                    continue
                s = line.strip().split("\t")
                if s[0] != chrom: continue
                cnt_total += 1
                pos, ref, alt = s[1], s[3], s[4]
                loc, var = f"{chrom}:{pos}", f"{ref}:{alt}"
                info = f"{loc}:{var}"
                dic_raw_all.setdefault(info, []).append(tool)
                if loc in dic_AF[mode] and var in dic_AF[mode][loc]:
                    af_t, ad_t = dic_AF[mode][loc][var]["l_t"]
                    af_bl, ad_bl = dic_AF[mode][loc][var]["l_bl"]
                    if af_t and af_t > 0:  # tumor supports alt
                        cnt_alt_in_t += 1
                        blood_has_alt = any(
                            dic_AF[mode][loc][other]["l_bl"][1] > 2
                            for other in dic_AF[mode][loc]
                        )
                        if af_bl == 0 and not blood_has_alt:  # valid
                            cnt_noalt_in_bl += 1
                            dic_valid[mode].append(loc)
                            out_vcf.write(line)
                            dic_step1[chrom].setdefault(info, []).append(tool)
                        else:
                            cnt_alt_in_bl += 1
                    else:
                        dic_altzero.setdefault(info, []).append(tool)
                else:
                    cnt_alt_notin_lt += 1

        # --- Write stats ---
        out_stat.write(
            f"{mode}\t{chrom}\t{cnt_total}\t{cnt_alt_in_t}\t{cnt_noalt_in_bl}\t"
            f"{cnt_alt_in_bl}\t{cnt_alt_notin_lt}\t{cnt_alt_notin_st}\n"
        )
        print(chrom, cnt_total, cnt_alt_in_t, cnt_noalt_in_bl, cnt_alt_in_bl, cnt_alt_notin_lt)

    out_vcf.close()
    out_stat.close()


In [15]:
print (len(dic_raw_all))
l_chrom= ['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22', 'chrX', 'chrY']

cnt = 0
for chrom in dic_step1:
    cnt += len(dic_step1[chrom])
print (cnt)
out_snv = open("/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/1.Illumina_PU20/Step1_Validated_union_snv.vcf", 'w')
vcf_header = "##fileformat=VCFv4.2\n##INFO=<ID=SP,Number=1,Type=String,Description='Variant caller supported this variant'>\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCOLO829T\n"

out_snv.write(vcf_header)

for chr_each in l_chrom:
    dic_bypos = {} #For sorting pos
    l_bypos = []
    for info in dic_step1[chr_each]:
        s = info.split(':')
        chrom, pos, ref, alt = s[0], int(s[1]), s[2], s[3]
        dic_bypos[pos] = info
        l_bypos.append(pos)
    l_bypos.sort()
    
    for pos in l_bypos:
        info = dic_bypos[pos]
        chrom, pos, ref, alt = s[0], s[1], s[2], s[3]
        s = info.split(':')
        chrom, pos, ref, alt = s[0], int(s[1]), s[2], s[3]
        
        l_tool= dic_step1[chrom][info]
        tool_support = ','.join(l_tool)
        out_snv.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %(chrom, str(pos), '.', ref, alt, '.', '.', 'SP='+ tool_support, '.\t.'))

61648
44005


In [8]:
for var in dic_VAF:
    print (var, dic_VAF[var])
    break

chr1:887559:C:T {'VAF_pb': 1.0, 'VAF_ill': None, 'VAF_bl': 0.0}


In [16]:
import pysam
import gzip

# --- Function to parse VAF from pileup FORMAT ---
def get_VAF_from_pileup(ad_field):
    try:
        ref, alt = int(ad_field.split(',')[0]), int(ad_field.split(',')[1])
        depth = ref + alt
        if depth == 0:
            return 0
    except Exception:
        return None
    return float(alt)/depth

# --- Load Illumina tumor VAFs from per-chrom pileups ---
pileup_dir = "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/2.PacBio_Valid/PU_bcftools"
dic_IllT = {}

chroms = [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY"]
for chrom in chroms:
    pileup_file = f"{pileup_dir}/Step1_Validated_union.{chrom}.PU.norm.vcf.gz"
    with open(pileup_file, "r") as f:
        for line in f:
            if line.startswith("#"):
                continue
            s = line.strip().split("\t")
            chrom_v, pos, ref, alt,  sample = s[0], s[1], s[3], s[4], s[-1]
            if alt == "<*>":
                continue
            variant = ":".join([chrom_v, pos, ref, alt])
            vaf_ill = get_VAF_from_pileup(sample.split(':')[-1])
            dic_IllT[variant] = vaf_ill
#print (dic_IllT)
# --- Load Region stratification ---
region_vcfs = {
    "Easy":      "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/2.PacBio_Valid/Region_snv/Step1_Validated_union_snv.SMaHT_easy_v2.bed.gz.vcf",
    "Difficult": "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/2.PacBio_Valid/Region_snv/Step1_Validated_union_snv.SMaHT_difficult_v2.bed.gz.vcf",
    "Extreme":   "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/2.PacBio_Valid/Region_snv/Step1_Validated_union_snv.SMaHT_extreme_v2.bed.gz.vcf",
}

region_map = {}
for region, path in region_vcfs.items():
    with pysam.VariantFile(path) as rv:
        for rec in rv.fetch():
            variant = f"{rec.chrom}:{rec.pos}:{rec.ref}:{rec.alts[0]}"
            region_map[variant] = region

# --- Input and output ---
input_vcf_path  = "/n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/2.PacBio_Valid/Step1_Validated_union_snv.vcf"
output_vcf_path = input_vcf_path[:-4] + "_VAF_RGN.vcf"

vcf_in = pysam.VariantFile(input_vcf_path, "r")

# --- Copy header and add contigs + INFO fields ---
new_header = vcf_in.header.copy()

# Ensure all typical chr contigs exist (prevents ValueError: Invalid chromosome/contig)
for c in chroms:
    if c not in new_header.contigs:
        new_header.contigs.add(c)

# Add new INFO fields if missing
for field, num, ftype, desc in [
    ("VAF_Ill", 1, "Float",  "Variant allele frequency in 200X Illumina COLO829T (pileup-based)"),
    ("VAF_PB",  1, "Float",  "Variant allele frequency in 180X PacBio COLO829T"),
    ("VAF_BL",  1, "Float",  "Variant allele frequency in 230X Illumina COLO829BL"),
    ("RGN",     1, "String", "SMaHT region stratification: Easy, Difficult, or Extreme"),
]:
    if field not in new_header.info:
        new_header.info.add(field, num, ftype, desc)

# --- Open output with modified header ---
vcf_out = pysam.VariantFile(output_vcf_path, "w", header=new_header)

# --- Iterate records ---
n_debug = 0
for rec in vcf_in.fetch():
    # Build variant key
    if not rec.alts:
        continue
    variant = f"{rec.chrom}:{rec.pos}:{rec.ref}:{rec.alts[0]}"

    # Create new record bound to new_header
    new_rec = vcf_out.new_record(
        contig=rec.chrom,
        start=rec.start,
        stop=rec.stop,
        alleles=rec.alleles,
        id=rec.id,
        qual=rec.qual,
        filter=list(rec.filter.keys()),
    )

    # Copy original INFO keys that exist in new_header
    for k, v in rec.info.items():
        if k in new_header.info:
            try:
                new_rec.info[k] = v
            except Exception:
                # Skip incompatible array/scalar mismatches silently
                pass

    # Copy samples/FORMAT (so you keep the original sample column)
    for sample in rec.samples:
        new_rec.samples[sample].update(rec.samples[sample])

    # Fetch VAFs and region
    # NOTE: dic_VAF must be present in the session (built by your first script)
    vaf_pb  = dic_VAF.get(variant, {}).get("VAF_pb")
   # vaf_bl  = dic_VAF.get(variant, {}).get("VAF_bl")
    vaf_ill = dic_IllT[variant]
    #print (variant, vaf_ill)
    region  = region_map.get(variant)

    if vaf_pb is not None:
        new_rec.info["VAF_PB"] = float(round(vaf_pb, 6))
    if vaf_ill is not None:
        new_rec.info["VAF_Ill"] = float(round(vaf_ill, 6))
    if region:
        new_rec.info["RGN"] = region

    vcf_out.write(new_rec)

    # Optional quick debug for first few records
    if n_debug < 3:
        print(f"DEBUG {variant} -> VAF_PB={vaf_pb} VAF_Ill={vaf_ill}  RGN={region}")
        n_debug += 1

vcf_in.close()
vcf_out.close()

print(f"✅ Output written: {output_vcf_path}")


[W::vcf_parse] Contig 'chr1' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr2' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr3' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr4' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr5' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr6' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr7' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr8' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr9' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr10' is not defined i

DEBUG chr1:183180:G:A -> VAF_PB=0.025 VAF_Ill=0.16467780429594273  RGN=Extreme
DEBUG chr1:183198:C:T -> VAF_PB=0.02631578947368421 VAF_Ill=0.14733542319749215  RGN=Extreme
DEBUG chr1:601606:G:T -> VAF_PB=0.3333333333333333 VAF_Ill=0.8648648648648649  RGN=Extreme


[W::vcf_parse] Contig 'chr4' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr5' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr6' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr7' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr8' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr9' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr10' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr11' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr12' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr13' is not define

✅ Output written: /n/data1/hms/dbmi/park/jiny/SMaHT/COLO829/0.truthset/2.PacBio_Valid/Step1_Validated_union_snv_VAF_RGN.vcf


[W::vcf_parse] Contig 'chr20' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr21' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chr22' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chrX' is not defined in the header. (Quick workaround: index the file with tabix.)
[W::vcf_parse] Contig 'chrY' is not defined in the header. (Quick workaround: index the file with tabix.)
