In [3]:
import os
import subprocess
import glob
import tqdm
import pickle
import pprint
from Bio        import SeqIO
from Bio        import SeqRecord
from Bio.Seq    import Seq
from Bio.Blast import NCBIXML
from Bio import SearchIO
from time import sleep
from pathlib import Path

In [2]:
working_directory = 'plasmid_id_v3' # set the working directory
print("CWD: ",os.getcwd())
assembly_dir = '/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs' # where are the assemblies?
blast_out = f'{working_directory}/blast_out' # where to put the blast output
output_json = f'{working_directory}/json' # just put the jsons somewhere.
ont_blast_out    = f'{blast_out}/ont'

CWD:  /Users/mf019/bioinformatics/longread_GWAS/plasmid_id


In [3]:
asms = glob.glob(f'{assembly_dir}/*.fasta')
ont_dict = {}
for file in asms:
    sample = Path(file).stem
    ont_dict[sample] = {}
    with open(file) as handle:
        for record in SeqIO.parse(handle, "fasta"):
            if record.id not in ont_dict[sample]:
                ont_dict[sample][str(record.id)] = record
                print(file, record.id)

/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000010
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000009
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000002
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000004
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000006
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000003
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000007
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000005
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/HB-19.fasta tig00000008
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/BB-8.fasta tig00000009
/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore

In [4]:
pickle_file = 'blast_parsing_dict.pickle'
parsing_dict = pickle.load(open(pickle_file,'rb'))


In [5]:
ont_blast_results = glob.glob(f'{ont_blast_out}/*.xml')
print(ont_blast_results)

['plasmid_id_v3/blast_out/ont/5A18NP1-JBb08-B_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/BG001_C7_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/5A18NP1-JBb08-E_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/5A18NP1-JBb08-A_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/BB-8_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/HB-19_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/B31-K2_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/BL091_B31E2_erpP_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/B31-5A4_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/N40_HP_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/5A18NP1-JBb08-D_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/5A18NP1-JBb08-C_ont_blast.xml', 'plasmid_id_v3/blast_out/ont/BL082_H1_ont_blast.xml']


In [6]:
all_hits = {}
for file in ont_blast_results:
    path = Path(file)
    assembly_id = path.name.split('/')[-1].strip('_ont_blast.xml')
    print(assembly_id+'\n')
    if assembly_id not in all_hits:
        all_hits[assembly_id] = {}
    current_sample = all_hits[assembly_id]
    res_handle = open(file, 'r')
    blast_records = NCBIXML.parse(res_handle)
    for record in blast_records:
        for alignment in record.alignments:
            for hsp in alignment.hsps:
                if (hsp.expect == 0 or hsp.expect < 1*10^-100):
                    query_length = record.query_length
                    contig_id = record.query.split(" ")[0]
                    hit_id = alignment.title.split(" ")[1]
                    alignment_length = alignment.length
                    bit_score = hsp.score
                    evalue = hsp.expect
                    if contig_id not in current_sample:
                        current_sample[contig_id] = []
                        current_sample[contig_id].append({'ID' : hit_id, 'alignment_length' : alignment_length, 'query_length' : query_length, 'bit_score' : bit_score, 'e-value' : evalue})
                    else:
                        current_sample[contig_id].append({'ID' : hit_id, 'alignment_length' : alignment_length, 'query_length' : query_length, 'bit_score' : bit_score, 'e-value' : evalue})
                    #print("****Alignment****")
                    #print("Sample: ", assembly_id)
                    #print("contig ID: ", contig_id)
                    #print("sequence:", hit_id)
                    #print("length:", alignment.length)
                    #print("bit score:",hsp.score)
                    #print("e value:", hsp.expect)

5A18NP1-JBb08-B

BG001_C7

5A18NP1-JBb08-E

5A18NP1-JBb08-A

BB-8

HB-19

B31-K2

BL091_B31E2_erpP

B31-5A4

N40_HP

5A18NP1-JBb08-D

5A18NP1-JBb08-C

BL082_H1



In [7]:
pprint.pprint(ont_dict.items())


dict_items([('HB-19', {'tig00000010': SeqRecord(seq=Seq('CTTGGCAATTTCGTATCTTAAGGAGTTTGATGATAAAGAAAAATTAAAAAAAAC...TTA'), id='tig00000010', name='tig00000010', description='tig00000010', dbxrefs=[]), 'tig00000009': SeqRecord(seq=Seq('CCAAGGCTCATTTTTTGTTAGTATTTTTAAATCTTTATCTAAATTTTCTAAAAA...TCC'), id='tig00000009', name='tig00000009', description='tig00000009', dbxrefs=[]), 'tig00000002': SeqRecord(seq=Seq('TTGTTGTTTATACATTATTACCTTCTTTGTATAAAATCTATCCAATAGAAAAGT...AGC'), id='tig00000002', name='tig00000002', description='tig00000002', dbxrefs=[]), 'tig00000004': SeqRecord(seq=Seq('AGTACAAAGTGATTAATAAGTTTAATAATCTCGCTACTTGCAAGCTTATAGGCT...TTT'), id='tig00000004', name='tig00000004', description='tig00000004', dbxrefs=[]), 'tig00000006': SeqRecord(seq=Seq('AAATAAGAATGAAACATTTAGTAAGAAGACTTTACTTGAGAAGGGACAAAATTT...CCA'), id='tig00000006', name='tig00000006', description='tig00000006', dbxrefs=[]), 'tig00000003': SeqRecord(seq=Seq('CAAAGATAAAGAATTATTTGCTATTTGCAATGGTAAAGATACTACTAATAGAAA...TAT'),

In [8]:
for sample in all_hits:
    print(sample)
    for contig in all_hits[sample].keys():
        print(contig)
        for hit in all_hits[sample][contig]:
            print(hit)
            print("-- number of HSPs: ",len(all_hits[sample][contig]))
            #for HSP in all_hits[sample][contig]:
            #    print("HSP ID: ", HSP['ID'])
            #    print("HSP Name: ", parsing_dict[HSP['ID']]['name'])

5A18NP1-JBb08-B
tig00000008
{'ID': 'CP019921.1', 'alignment_length': 62238, 'query_length': 45296, 'bit_score': 58578.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'CP019921.1', 'alignment_length': 62238, 'query_length': 45296, 'bit_score': 51681.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'CP019921.1', 'alignment_length': 62238, 'query_length': 45296, 'bit_score': 16994.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'CP019921.1', 'alignment_length': 62238, 'query_length': 45296, 'bit_score': 15512.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'CP019921.1', 'alignment_length': 62238, 'query_length': 45296, 'bit_score': 6543.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'AE001581.1', 'alignment_length': 30651, 'query_length': 45296, 'bit_score': 51899.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'AE001581.1', 'alignment_length': 30651, 'query_length': 45296, 'bit_score': 38676.0, 'e-value': 0.0}
-- number of HSPs:  14
{'ID': 'CP019760.1', 'alignment_length': 30651, '

In [9]:
hits_dict = {}
results = {}

for sample in all_hits:
    current_sample = all_hits[sample]
    results[sample] = {}
    sample_results = results[sample]
    for contig in current_sample:
        #print(sample, contig, current_sample[contig], sample_results)
        if contig not in sample_results:
            sample_results[contig] = {}

        if contig not in ont_dict[sample]:
            print(f"{contig} not in {sample}????????", ont_dict[sample][contig])
            continue
        print(ont_dict[sample][contig])
        curr_contig = sample_results[contig]
        curr_contig = { 'assembly' : ont_dict[sample][contig],
                                  'hits' : current_sample[contig],
                              'RefStats' : [] }
        for hit in enumerate(curr_contig['hits']):
            #print(hit[0])
            print("        "+"hit:",hit[1]['ID'],"lookup table:", parsing_dict[hit[1]['ID']]['name'])
            hit[1]['name'] = parsing_dict[hit[1]['ID']]['name']
            print("            "+"new name:",hit[1]['ID'], hit[1]['name'])
            curr_contig['RefStats'].append(parsing_dict[hit[1]['ID']])
        sample_results[contig] = curr_contig

ID: tig00000008
Name: tig00000008
Description: tig00000008
Number of features: 0
Seq('GAAATTTGCTCATGAAAAAAATTTTGATTTCATAATGGCAGGAACTTTAGGACC...TTT')
        hit: CP019921.1 lookup table: cp32-9-4
            new name: CP019921.1 cp32-9-4
        hit: CP019921.1 lookup table: cp32-9-4
            new name: CP019921.1 cp32-9-4
        hit: CP019921.1 lookup table: cp32-9-4
            new name: CP019921.1 cp32-9-4
        hit: CP019921.1 lookup table: cp32-9-4
            new name: CP019921.1 cp32-9-4
        hit: CP019921.1 lookup table: cp32-9-4
            new name: CP019921.1 cp32-9-4
        hit: AE001581.1 lookup table: cp32-9
            new name: AE001581.1 cp32-9
        hit: AE001581.1 lookup table: cp32-9
            new name: AE001581.1 cp32-9
        hit: CP019760.1 lookup table: cp32-9
            new name: CP019760.1 cp32-9
        hit: CP019760.1 lookup table: cp32-9
            new name: CP019760.1 cp32-9
        hit: CP094605.1 lookup table: cp32-9
            new name:

In [10]:
for sample in results:
    print(sample)
    if sample == 'B31-K2':
        for contig in results[sample]:
            for hit in results[sample][contig]['hits']:
                #print(hit)
                #print("-- number of HSPs: ",len(results[sample][contig]['hits']))
                HSPcount = 0
                for HSP in results[sample][contig]['hits']:
                    #print("HSP ID: ", HSP['ID'])
                    print(f"{sample} {contig} Hit#:{HSPcount} HSP Name: {parsing_dict[HSP['ID']]['name']}")
                    HSPcount += 1

5A18NP1-JBb08-B
BG001_C7
5A18NP1-JBb08-E
5A18NP1-JBb08-A
BB-8
HB-19
B31-K2
B31-K2 tig00000019 Hit#:0 HSP Name: cp32-5+1
B31-K2 tig00000019 Hit#:1 HSP Name: cp32-5+1
B31-K2 tig00000019 Hit#:2 HSP Name: cp32-5+1
B31-K2 tig00000019 Hit#:3 HSP Name: cp32-5-1
B31-K2 tig00000019 Hit#:4 HSP Name: cp32-5-1
B31-K2 tig00000019 Hit#:5 HSP Name: cp32-5-1
B31-K2 tig00000019 Hit#:6 HSP Name: cp32-6
B31-K2 tig00000019 Hit#:7 HSP Name: cp32-6
B31-K2 tig00000019 Hit#:8 HSP Name: cp32-6
B31-K2 tig00000019 Hit#:9 HSP Name: cp32-6
B31-K2 tig00000019 Hit#:10 HSP Name: cp32-9
B31-K2 tig00000019 Hit#:11 HSP Name: cp32-9
B31-K2 tig00000019 Hit#:0 HSP Name: cp32-5+1
B31-K2 tig00000019 Hit#:1 HSP Name: cp32-5+1
B31-K2 tig00000019 Hit#:2 HSP Name: cp32-5+1
B31-K2 tig00000019 Hit#:3 HSP Name: cp32-5-1
B31-K2 tig00000019 Hit#:4 HSP Name: cp32-5-1
B31-K2 tig00000019 Hit#:5 HSP Name: cp32-5-1
B31-K2 tig00000019 Hit#:6 HSP Name: cp32-6
B31-K2 tig00000019 Hit#:7 HSP Name: cp32-6
B31-K2 tig00000019 Hit#:8 HSP Name: cp3

In [11]:
with open('ont_asm_plasmids_v1.csv', 'w') as handle:
    handle.write('sample,contig,name,length\n')
    for sample in results:
        print(sample)
        for contig in results[sample]:
            refname = results[sample][contig]['RefStats'][0]['name']
            length = len(ont_dict[sample][contig].seq)
            handle.write(f'{sample}, {contig}, {refname}, {length}\n')

5A18NP1-JBb08-B
BG001_C7
5A18NP1-JBb08-E
5A18NP1-JBb08-A
BB-8
HB-19
B31-K2
BL091_B31E2_erpP
B31-5A4
N40_HP
5A18NP1-JBb08-D
5A18NP1-JBb08-C
BL082_H1


In [65]:
#pprint.pprint(ont_dict)
for sample in ont_dict:
    print(sample)
    for contig in ont_dict[sample]:
        print(sample,contig,[hit['name'] for hit in results[sample][contig]['RefStats']],len(ont_dict[sample][contig].seq))

HB-19
HB-19 tig00000010 ['chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome'] 91761
HB-19 tig00000009 ['chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome', 'chromosome'] 917230
HB-19 tig00000002 ['cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26', 'cp26'] 49899
HB-19 tig00000004 ['cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-6', 'cp32-2', 'cp32-2', 'cp32-2', 'cp32-2', 'cp32-2'] 51620
HB-19 tig00000006 ['lp28-6', 'lp2

In [82]:
outpath = '/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2'
for sample in ont_dict:
    if sample == 'B31-K2':
        for contig in ont_dict[sample]:
            print(contig)
            print(ont_dict[sample][contig])
            outfile = f'{outpath}/{contig}.fa'
            #with open(outfile, 'w') as handle:
                #SeqIO.write(ont_dict[sample][contig],handle, 'fasta')
                #print("file written")

tig00000019
ID: tig00000019
Name: tig00000019
Description: tig00000019
Number of features: 0
Seq('AAATTATTGAAAGAATTGAGTGATGCTAGGGACGCGCTACGAACTAAATTAAAT...AAT')
tig00000011
ID: tig00000011
Name: tig00000011
Description: tig00000011
Number of features: 0
Seq('AACAACTAAAAAATACGCTATCTAATGATTTAAAAAAGCAAATAGAATCGGCCT...ATA')
tig00000025
ID: tig00000025
Name: tig00000025
Description: tig00000025
Number of features: 0
Seq('TTATTCGTATCGCTAGAATATAACTTTTTATTTTCAATTAAACTTTTGAGAAAA...ACT')
tig00000004
ID: tig00000004
Name: tig00000004
Description: tig00000004
Number of features: 0
Seq('AGCTTTATGAGTTTTTGTTTATTTCTTTCAAGGTTTTTATTTAGTAAATAAAGT...ATA')
tig00000015
ID: tig00000015
Name: tig00000015
Description: tig00000015
Number of features: 0
Seq('AAAAAAAATAGATAGCATAATTTCAAATTTAAAAGAGTTTAATGAAAACTTTAC...TTA')
tig00000023
ID: tig00000023
Name: tig00000023
Description: tig00000023
Number of features: 0
Seq('AGATGATGGAAATGTTCGAGGATTTTAAATCTTTCGGCCTTACAAAAAAAGTCA...AAT')
tig00000002
ID: tig00000002
Name: 

In [79]:

outpath = '/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2'
print(os.getcwd())
k2_contig_files = glob.glob(f'{outpath}/*.fa')
for file in k2_contig_files:
    id = file.split('/')[-1].split('.')[0]
    directory = f'{outpath}/{id}'
    delta = f'{directory}/{id}.delta'

    if not os.path.exists(directory):
        os.makedirs(directory)

    options = f"--threads 8 --maxmatch --nosimplify --delta={delta}"
    command = f"nucmer {options} {file} {file}"
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    stdout, stderr = process.communicate()

    ## now get coords
    newcmd = f"show-coords -r -c -l {delta} > {directory}/{id}.coords"
    process = subprocess.Popen(newcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    stdout, stderr = process.communicate()

    # and lets tile it
    tilecmd = f"show-tiling {delta} > {directory}/{id}.tiling"
    process = subprocess.Popen(tilecmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    stdout, stderr = process.communicate()

    # and lets plot it but for real this time
    plotcmd = f"mummerplot -p {directory}/{id} --png {delta}"
    process = subprocess.Popen(plotcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    stdout, stderr = process.communicate()
    print(stdout, stderr)

/Users/mf019/bioinformatics/longread_GWAS/plasmid_id
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000002/tig00000002.delta\nWriting plot files /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000002/tig00000002.fplot, /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000002/tig00000002.rplot\nWriting gnuplot script /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000002/tig00000002.gp\nRendering plot /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000002/tig00000002.png\n'
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000012/tig00000012.delta\nWriting plot files /Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/k2/tig00000012/tig00000012.fplot, /Users/mf019/bioinformatics/longread_GWAS/as

In [95]:
file = '../ref/GCF_000008685.2_ASM868v2_genomic.fna'
outpath = 'refmums'
id = 'b31ref'
directory = f'{outpath}/{id}'
delta = f'{directory}/{id}.delta'
if not os.path.exists(directory):
    os.makedirs(directory)
options = f"--threads 8 --maxmatch --nosimplify --delta={delta}"
command = f"nucmer {options} {file} {file}"
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

# and lets plot it but for real this time
plotcmd = f"mummerplot -p {directory}/{id} --png {delta}"
process = subprocess.Popen(plotcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file refmums/b31ref/b31ref.delta\nWriting plot files refmums/b31ref/b31ref.fplot, refmums/b31ref/b31ref.rplot\nWriting gnuplot script refmums/b31ref/b31ref.gp\nRendering plot refmums/b31ref/b31ref.png\n'


In [98]:
files = glob.glob('/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/*.fasta')
for file in files:
    id = file.split('/')[-1].split('.')[0]
    outpath = 'mummer_output'
    directory = f'{outpath}/{id}'
    delta = f'{directory}/{id}.delta'
    if not os.path.exists(directory):
        os.makedirs(directory)
    options = f"--threads 8 --maxmatch --nosimplify --delta={delta}"
    command = f"nucmer {options} {file} {file}"
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    stdout, stderr = process.communicate()
    print(stdout, stderr)

    # and lets plot it but for real this time
    plotcmd = f"mummerplot -p {directory}/{id} --png {delta}"
    process = subprocess.Popen(plotcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    stdout, stderr = process.communicate()
    print(stdout, stderr)

b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file mummer_output/HB-19/HB-19.delta\nWriting plot files mummer_output/HB-19/HB-19.fplot, mummer_output/HB-19/HB-19.rplot\nWriting gnuplot script mummer_output/HB-19/HB-19.gp\nRendering plot mummer_output/HB-19/HB-19.png\n'
b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file mummer_output/BB-8/BB-8.delta\nWriting plot files mummer_output/BB-8/BB-8.fplot, mummer_output/BB-8/BB-8.rplot\nWriting gnuplot script mummer_output/BB-8/BB-8.gp\nRendering plot mummer_output/BB-8/BB-8.png\n'
b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file mummer_output/B31-K2/B31-K2.delta\nWriting plot files mummer_output/B31-K2/B31-K2.fplot, mummer_output/B31-K2/B31-K2.rplot\nWriting gnuplot script mummer_output/B31-K2/B31-K2.gp\nRendering plot mummer_output/B31-K2/B31-K2.png\n'
b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file mummer_output/BL091_B31E2_erpP/BL091_B31E2_erpP.delta\nWriting plot files mummer_output/BL091_B31E2_erpP/BL

In [101]:
file = '/Users/mf019/bioinformatics/longread_GWAS/assemblies/illumina/contigs/URI34_200.fasta'
outpath = 'luminamums'
id = 'URI_34'
directory = f'{outpath}/{id}'
delta = f'{directory}/{id}.delta'
if not os.path.exists(directory):
    os.makedirs(directory)
options = f"--threads 8 --maxmatch --nosimplify --delta={delta}"
command = f"nucmer {options} {file} {file}"
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

# and lets plot it but for real this time
plotcmd = f"mummerplot -p {directory}/{id} --png {delta}"
process = subprocess.Popen(plotcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file luminamums/URI_34/URI_34.delta\nWriting plot files luminamums/URI_34/URI_34.fplot, luminamums/URI_34/URI_34.rplot\nWriting gnuplot script luminamums/URI_34/URI_34.gp\nRendering plot luminamums/URI_34/URI_34.png\n'


In [4]:
file = 'all_plasmids.fasta'
outpath = 'meta_mums'
id = 'all_plasmids'
directory = f'{outpath}/{id}'
delta = f'{directory}/{id}.delta'
if not os.path.exists(directory):
    os.makedirs(directory)
options = f"--threads 8 --maxmatch --nosimplify --delta={delta}"
command = f"nucmer {options} {file} {file}"
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

# and lets plot it but for real this time
plotcmd = f"mummerplot -p {directory}/{id} --png {delta}"
process = subprocess.Popen(plotcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file meta_mums/all_plasmids/all_plasmids.delta\nWriting plot files meta_mums/all_plasmids/all_plasmids.fplot, meta_mums/all_plasmids/all_plasmids.rplot\nWriting gnuplot script meta_mums/all_plasmids/all_plasmids.gp\nRendering plot meta_mums/all_plasmids/all_plasmids.png\n'


In [8]:
asm = '/Users/mf019/bioinformatics/longread_GWAS/assemblies/nanopore/contigs/B31-K2.fasta'
ref = '../ref/GCF_000008685.2_ASM868v2_genomic.fna'
outpath = 'k2vref'
id = 'k2vref'
directory = f'{outpath}/{id}'
delta = f'{directory}/{id}.delta'
if not os.path.exists(directory):
    os.makedirs(directory)
options = f"--threads 8 --maxmatch --nosimplify --delta={delta}"
command = f"nucmer {options} {ref} {asm}"
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

# and lets plot it but for real this time
plotcmd = f"mummerplot -p {directory}/{id} --png {delta}"
process = subprocess.Popen(plotcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
print(stdout, stderr)

b'' b''
b'gnuplot 6.0 patchlevel 0\n' b'Reading delta file k2vref/k2vref/k2vref.delta\nWriting plot files k2vref/k2vref/k2vref.fplot, k2vref/k2vref/k2vref.rplot\nWriting gnuplot script k2vref/k2vref/k2vref.gp\nRendering plot k2vref/k2vref/k2vref.png\n'
