# Select queries to map on part of reference

In [10]:
from Bio import SeqIO
import gzip

In [11]:
def write_subseq_to_fasta(input_file, output_file, lower, upper, i):
    '''
    Write a subsequence from a fasta entry to a separate fasta file.
    '''
    records = list(SeqIO.parse(input_file, "fasta"))
    records[i].seq = records[i].seq[lower-1:upper-1]
    SeqIO.write(records, output_file, "fasta")

In [12]:
def write_queries_to_fastq(input_file, output_file, query_list):
    '''
    Write a list of queries to a fastq file.
    '''
    records = []
    for record in SeqIO.parse(gzip.open(input_file, "rt"),"fastq"):
        if record.id in query_list:
            records.append(record)
    SeqIO.write(records, output_file, "fastq")

In [13]:
write_subseq_to_fasta("../data/2022-09-21_amoxicillin_run_vial_14_time_14/ref_genome.fa", "../results/ref_genome_subseq.fa", 3950000, 4050000, 0)
query_list = ["d82bb180-bfcf-4f19-b0f5-92ddfd7a950b", "1cc8043e-4ac7-41c8-ac72-6a1eab8f6d19", "bf6d9ba9-1063-4a88-aceb-4a9c257bca47", "fc96be32-a462-4dc3-9cdc-0361db1988c0", "abfef2df-af37-4ef5-b6bc-cd6dfae662a4", "13ebfd23-fdbf-401b-8c95-7a0794f5e947", "2b8a7c83-939b-4b7d-a60b-bcd3b3a61c34", "224b1090-59e5-4138-9aca-df79398e0336", "a3b06e8a-64be-4461-af06-2e5daa399603", "b23ad628-5719-4496-8734-329f6c0e9af5", "b6a83720-57bd-4532-ae6f-d3cdff119a59", "e764358d-3964-4948-bcf1-0169990a4a92"]
write_queries_to_fastq("../data/2022-09-21_amoxicillin_run_vial_14_time_14/reads.fastq.gz", "../results/queries.fastq", query_list)

In [14]:
write_subseq_to_fasta("../data/2022-09-21_amoxicillin_run_vial_14_time_14/ref_genome.fa", "../results/35e5bp.fa", 3502046, 3503200, 0)
write_subseq_to_fasta("../data/2022-09-21_amoxicillin_run_vial_14_time_14/ref_genome.fa", "../results/40e5bp.fa", 4027963, 4029100, 0)
write_subseq_to_fasta("../data/2022-09-21_amoxicillin_run_vial_14_time_14/ref_genome.fa", "../results/376e4.fa", 3762596, 3770895, 0)
write_subseq_to_fasta("../data/2022-09-21_amoxicillin_run_vial_14_time_14/ref_genome.fa", "../results/18e5.fa", 182454, 192149, 0)
