In [1]:
from DeepVCF import pathing
from DeepVCF import pandas as pd
from DeepVCF import alignment
from DeepVCF import dwgsim
from DeepVCF import bcftools_vcf
from DeepVCF.core import DeepVCF

=== No GPU Detected ===




In [2]:
bacteria = [
#     'c_difficile',  # !
#     'e_coli',
#     'klebsiella',  # !
#     'listeria',
#     'neisseria',
#     'salmonella',  # !
#     'staphylococcus',
    'streptococcus',
    'tb'
]

In [3]:
def populate_train(folder, prefix, ref, be_rate):
    _ = dwgsim(
        ref_file=ref,
        output_folder=folder,
        output_prefix=prefix,
        verbose=False,
        **{
            '-r': .005,  # mutation rate :: need to get about 20K mutations for usable training,
            '-e': be_rate,
            '-E': be_rate,
            '-C': 50,  # mean coverage
            '-1': 150, # read length
            '-2': 150, # read length
        }
    )
    _ = alignment.bwa_mem(
        reference=ref,
        reads=[
            folder / f'{prefix}.bwa.read1.fastq', 
            folder / f'{prefix}.bwa.read2.fastq'
        ],
        output_folder=folder,
        output_prefix=prefix,
        verbose=False,
    )
    deepvcf = DeepVCF()
    deepvcf.train(
        reference_file=ref,
        alignment_file=folder/(prefix + '.mapped.bam'),
        vcf_file=folder/f'{prefix}.mutations.vcf',
        mimimum_alignment_coverage=.75,
        minimum_coverage=30,
        heterozygous_threshold=.25,
        minimum_variant_radius=15,
        save_pileup_to_destination=folder/f'{prefix}.pileup.npy',
        # use_saved_pileup=folder/f'{prefix}.pileup.npy',
        verbose=0,
    )
    return deepvcf

def populate_test(folder, prefix, ref, be_rate, deepvcf):
    _ = dwgsim(
        ref_file=ref,
        output_folder=folder,
        output_prefix=prefix,
        verbose=False,
        **{
            '-r': .005,  # mutation rate :: need to get about 20K mutations for usable training,
            '-e': be_rate,
            '-E': be_rate,
            '-C': 50,  # mean coverage
            '-1': 150, # read length
            '-2': 150, # read length
        }
    )
    _ = alignment.bwa_mem(
        reference=ref,
        reads=[
            folder / f'{prefix}.bwa.read1.fastq', 
            folder / f'{prefix}.bwa.read2.fastq'
        ],
        output_folder=folder,
        output_prefix=prefix,
        verbose=False,
    )
    dcf = deepvcf.create_vcf(
        reference_file=ref,
        alignment_file=folder/(prefix + '.mapped.bam'),
        save_pileup_to_destination=folder/f'{prefix}.pileup.npy',
        # use_saved_pileup=folder/f'{prefix}.pileup.npy',
        mimimum_alignment_coverage=.75,
        minimum_coverage=30,
        heterozygous_threshold=.25,
        minimum_variant_radius=15,
        output_folder=folder.parent,  # save as an actual .vcf file
        output_prefix=prefix, 
    )
    metrics = deepvcf.validation(
        predicted_vcf=dcf,
        real_vcf=folder/(prefix+'.mutations.vcf'),
    )
    print('DeepVCF:', metrics)
    del deepvcf

for name in bacteria:
    print(name)
    train_folder = pathing(name) / 'train'     
    test_folder = pathing(name) / 'test'
    ref_train = list(train_folder.glob('*.fasta'))[0]
    ref_test = list(test_folder.glob('*.fasta'))[0]
    for be, be_rate in [('2', .02), ('10', .1)]:
        prefix=f'train-base-error-{be}percent'
        deepvcf = populate_train(train_folder, prefix, ref_train, be_rate)
        prefix=f'test-base-error-{be}percent'
        populate_test(test_folder, prefix, ref_test, be_rate, deepvcf)
            

streptococcus
=== Building Pileup From Scratch ===
=== Pilup Complete ===
=== Tensors Complete ===
=== Building Pileup From Scratch ===
=== Pilup Complete ===
=== Tensors Complete ===
DeepVCF: {'hom_alt': {'Sensitivity': 0.9833785617367707, 'PPV': 0.9948524365133836, 'Accuracy': 0.9929770657302754, 'F1': 0.9890822244967588}, 'het': {'Sensitivity': 0.9873133129918099, 'PPV': 0.9857303190636524, 'Accuracy': 0.9817708333333334, 'F1': 0.9865211810012837}}
=== Building Pileup From Scratch ===
=== Pilup Complete ===
=== Tensors Complete ===
=== Building Pileup From Scratch ===
=== Pilup Complete ===
=== Tensors Complete ===
DeepVCF: {'hom_alt': {'Sensitivity': 0.9682692307692308, 'PPV': 0.9843597262952102, 'Accuracy': 0.9837497236347557, 'F1': 0.9762481822588464}, 'het': {'Sensitivity': 0.964080695423979, 'PPV': 0.9850846321434557, 'Accuracy': 0.966547192353644, 'F1': 0.9744694960212201}}
tb
=== Building Pileup From Scratch ===
=== Pilup Complete ===
=== Tensors Complete ===
=== Building Pil