## Helper functions

In [31]:
from math import ceil

def read_consensus(filename):
    with open(filename) as f:
        lines = f.readlines()
        contig_names, contigs = list(), list()
        for i in range(0, len(lines), 2):
            contig_name =  lines[i].strip()
            contig = lines[i + 1].strip()
            
            contig_names.append(contig_name), contigs.append(contig)
    return contig_names, contigs

CHUNK_LEN = 80

def read_consensus_chunks(filename):
    with open(filename) as f:
        contig_names, contigs = list(), list()
        contig = ''
        for line in f:
            line = line.strip()
            if line.startswith('>'):
                contig_names.append(line)
                if len(contig) > 0:
                    contigs.append(contig)
                contig = ''
            else:
                contig += line
        contigs.append(contig)
    return contig_names, contigs

def write_consensus_one(filename, contig_names, contigs):
    with open(filename, 'w') as f:
        for i, (contig_name, contig) in enumerate(zip(contig_names, contigs)):
            f.write('>{}{} LN:{}\n'.format(i, contig_name, len(contig)))
            f.write('{}\n'.format(contig))

def write_consensus(filename, contig_names, contigs):
    with open(filename, 'w') as f:
        for i, (contig_name, contig) in enumerate(zip(contig_names, contigs)):
            f.write('>{}{} LN:{}\n'.format(i, contig_name, len(contig)))
            num_chunks = ceil(len(contig) / CHUNK_LEN)
            for chunk_id in range(num_chunks):
                contig_chunk = contig[chunk_id*CHUNK_LEN:(chunk_id+1)*CHUNK_LEN]
                f.write('{}\n'.format(contig_chunk))

## Convert k. pneumoniae model 11

In [22]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/k-pneumoniae-NCTC204-all-contigs-n20-model-11-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/k-pneumoniae-NCTC204-all-contigs-n20-model-11-racon-hax/wrapped-consensus.fasta'
write_consensus(output_filename, contig_names, contigs)

alooooo 8
alooooooooooooooooo 65720
alooooooooooooooooo 1625
alooooooooooooooooo 1321
alooooooooooooooooo 155


## Convert k. pneumoniae model 7

In [30]:
# input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/k-pneumoniae-NCTC204-all-contigs-n20-model-7-racon-hax/consensus.fasta'
# contig_names, contigs = read_consensus(input_filename)

# output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/k-pneumoniae-NCTC204-all-contigs-n20-model-7-racon-hax/wrapped-consensus.fasta'
# write_consensus(output_filename, contig_names, contigs)

input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/k-pneumoniae-NCTC204-all-contigs-n20-model-7-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus_chunks(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/k-pneumoniae-NCTC204-all-contigs-n20-model-7-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)

## Convert m. morgani model 11

In [32]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/m-morgani-NCTC235-all-contigs-n20-model-11-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/m-morgani-NCTC235-all-contigs-n20-model-11-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)

## Convert m. morgani model 7

In [33]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/m-morgani-NCTC235-all-contigs-n20-model-7-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/m-morgani-NCTC235-all-contigs-n20-model-7-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)

## Convert s. enterica 92 model 11

In [34]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC129-all-contigs-n20-model-11-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC129-all-contigs-n20-model-11-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)

## Convert s. enterica 129 model 7

In [35]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC129-all-contigs-n20-model-7-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC129-all-contigs-n20-model-7-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)

## Convert s. enterica 92 model 11

In [37]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC92-all-contigs-n20-model-11-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC92-all-contigs-n20-model-11-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)

## Convert s. enterica 92 model 7

In [39]:
input_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC92-all-contigs-n20-model-7-racon-hax/consensus.fasta'
contig_names, contigs = read_consensus(input_filename)

output_filename = '/home/diplomski-rad/blade/pb/consensus-inference/s-enterica-NCTC92-all-contigs-n20-model-7-racon-hax/good-consensus.fasta'
write_consensus_one(output_filename, contig_names, contigs)