In [2]:
import h5py
from pod5 import DatasetReader
import pyslow5

from src.Containers.Fasta import Fasta
from src.Containers.FastQ import FastQ
from src.Containers.Pod5 import Pod5
from src.Containers.Fast5 import Fast5
from src.Containers.Blow5 import Blow5
from src.Containers.Slow5 import Slow5

from src.Containers.ClassifContainer import ClassifContainer
from src.WriterSystem.WriterWrapper import WriterWrapper

from src.ReaderSystem.ReaderWrapper import ReaderWrapper

In [8]:
fasta_0 = ClassifContainer(Fasta('Coli_0', 'AAAAA'), 'E.coli')
fasta_1 = ClassifContainer(Fasta('Coli_1', 'ATAAA'), 'E.coli')
fasta_2 = ClassifContainer(Fasta('Coli_2', 'AAGAA'), 'E.coli')

fasta_3 = ClassifContainer(Fasta('GGAATTT', 'Bacillus_0'), "Bacillus")

fasta_coll = [fasta_0, fasta_1, fasta_2, fasta_3]

with WriterWrapper(False, 2, 'fasta') as oww:
    oww.write(fasta_coll)
# end with

In [3]:
fastq_0 = ClassifContainer(FastQ("Coli_0", "AAAAA", "+", "!!!!!"), "E.coli")
fastq_1 = ClassifContainer(FastQ("Coli_1", "AAATA", "+", "!!!!!"), "E.coli")
fastq_2 = ClassifContainer(FastQ("Coli_2", "AAATTT", "+", "!!!!!!"), "E.coli")

fastq_3 = ClassifContainer(FastQ("Bacillus_0", "GGAATTT", "+", "!!!!!!!"), 'B.subtilis')

fastq_coll = [fastq_0, fastq_1, fastq_2, fastq_3]

with WriterWrapper(False, 2, 'fastq') as oww:
    oww.write(fastq_coll)
# end with



In [15]:
from src.Containers.FastQ import FastQ

fastq = FastQ("Coli_0", "AAA", "+", "*+,", offset = 34)

fastq.average_quality()



9.923583702678474

In [2]:
with DatasetReader("FAY62206_04e2939b_50079bd0_18.pod5") as dataset:
    reads = dataset.reads()

first = next(reads)
second = next(reads)
third = next(reads)

In [3]:
pod5_0 = ClassifContainer(Pod5(first), 'E.coli')
pod5_1 = ClassifContainer(Pod5(second), 'E.coli')
pod5_2 = ClassifContainer(Pod5(third), 'E.coli')

pod5_3 = ClassifContainer(Pod5(first), "Bacillus")

fasta_coll = [pod5_0, pod5_1, pod5_2, pod5_3]

with WriterWrapper(False, 2, 'pod5') as oww:
    oww.write(fasta_coll)
# end with

In [14]:
out_file_handle = h5py.File('FAL91692_72e297f8_0.fast5', 'r')

pod5_0 = ClassifContainer(Fast5(out_file_handle, 'read_00001da1-4ebb-4ce6-9128-d050d3696fcc'), 'E.coli')
pod5_1 = ClassifContainer(Fast5(out_file_handle, 'read_000ab04f-2712-4604-87d5-2f46b6a87895'), 'E.coli')
pod5_2 = ClassifContainer(Fast5(out_file_handle, 'read_000eb25f-b049-435f-82f6-1fee4796f0c4'), 'E.coli')

pod5_3 = ClassifContainer(Fast5(out_file_handle, 'read_00001da1-4ebb-4ce6-9128-d050d3696fcc'), "Bacillus")

fasta_coll = [pod5_0, pod5_1, pod5_2, pod5_3]

with WriterWrapper(True, 2, 'fast5') as oww:
    oww.write(fasta_coll)
# end with

out_file_handle.close()



In [5]:
s5 = pyslow5.Open('FAY62206_04e2939b_50079bd0_18.blow5', 'r')

# print(s5.get_read_ids())

first = s5.get_read('c018e0f4-2cf9-4b36-8961-751cc03d8dd5')
second = s5.get_read('0082f3c8-8b36-443b-a50a-064918f7e871')
third = s5.get_read('34f2315d-1dfc-43cb-a772-c348ab46aa59')

fasta_0 = ClassifContainer(Blow5(first), 'E.coli')
fasta_1 = ClassifContainer(Blow5(second), 'E.coli')
fasta_2 = ClassifContainer(Blow5(third), 'E.coli')

fasta_3 = ClassifContainer(Blow5(third), "Bacillus")

fasta_coll = [fasta_0, fasta_1, fasta_2, fasta_3]

with WriterWrapper(False, 2, 'blow5') as oww:
    oww.write(fasta_coll)
# end with

s5.close()

[slow5_idx_init::INFO][1;34m Index file not found. Creating an index at 'FAY62206_04e2939b_50079bd0_18.blow5.idx'.[0m


In [6]:
s5 = pyslow5.Open('FAY62206_04e2939b_50079bd0_18.slow5', 'r')

# print(s5.get_read_ids())

first = s5.get_read('c018e0f4-2cf9-4b36-8961-751cc03d8dd5')
second = s5.get_read('0082f3c8-8b36-443b-a50a-064918f7e871')
third = s5.get_read('34f2315d-1dfc-43cb-a772-c348ab46aa59')

fasta_0 = ClassifContainer(Slow5(first), 'E.coli')
fasta_1 = ClassifContainer(Slow5(second), 'E.coli')
fasta_2 = ClassifContainer(Slow5(third), 'E.coli')

fasta_3 = ClassifContainer(Slow5(third), "Bacillus")

fasta_coll = [fasta_0, fasta_1, fasta_2, fasta_3]

with WriterWrapper(False, 2, 'slow5') as oww:
    oww.write(fasta_coll)
# end with

s5.close()

In [12]:
try:
    with ReaderWrapper('test.fasta', 
                       packet_size = 20, 
                       probing_packet_size = 10,
                       mode = 'sum_seq_len',
                       max_seq_len = 6) as f:
        while True:
            print(next(f))
        # end while
    # end with
except StopIteration:
    pass
# end try

[Fasta(header='>U00096.3_segment_0', sequence='AAAAAAAAAA'), Fasta(header='>U00096.3_segment_0_0_0', sequence='AAAAAAAAAA'), Fasta(header='>U00096.3_segment_0_0_1', sequence='AAAAAAAAAA'), Fasta(header='>U00096.3_segment_0_0_2', sequence='AAAAAAAAAA')]
[Fasta(header='>U00096.3_segment_0_0_3', sequence='AAAAAAAAAA'), Fasta(header='>U00096.3_segment_0_0_4', sequence='AAAAAAAAAA'), Fasta(header='>U00096.3_segment_0_0_5', sequence='AAAAAAAAAA'), Fasta(header='>U00096.3_segment_0_0', sequence='AGCTTTTC')]
[Fasta(header='>U00096.3_segment_0_1', sequence='AGCTTTTCATTCTGAC'), Fasta(header='>U00096.3_segment_0_2', sequence='AGCT')]
