# BLAST results handling: running queries to stream and save the results

In [None]:
from Bio.Blast import NCBIWWW, NCBIXML
import csv
import sys

### (1) Query

In [2]:
target_sequence = """ggtaagtcctctagtacaaacacccccaatattgtgatataattaaa
attatattcatattctgttgccagaaaaaacacttttaggctatattagagccatcttctttgaagcgttgtc"""

In [3]:
result_handle = NCBIWWW.qblast("blastn", "nt", target_sequence)

### (2) Save the query retuls as XML

In [None]:
# Save to file with progress
chunk_size = 8*1024  # 8 KB
total_bytes = 0

with open("blast_results.xml", "w") as out_handle:
    while True:
        data = result_handle.read(chunk_size)
        if not data:
            break
        out_handle.write(data)
        total_bytes += len(data)

        # Print progress (very rough, since we don't know total size)
        sys.stdout.write(f"\rDownloaded: {total_bytes / 1024:.2f} KB")
        sys.stdout.flush()

result_handle.close()
print("\nDownload complete.")

Downloaded: 71.04 KB
Download complete.


### (3) Check the query results and save only the suitable ones in a CSV file

In [14]:

E_VALUE_THRESH = 1e-11
input_file = "blast_results.xml"
output_csv = "filtered_blast_hits.csv"
max_length = 200

with open(input_file) as result_handle, open(output_csv, mode="w", newline="") as csvfile:
    fieldnames = [
        "query_id",
        "alignment_title",
        "alignment_length",
        "e_value",
        # "query_snippet",
        # "match_snippet",
        # "subject_snippet",
    ]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    count = 0
    for record in NCBIXML.parse(result_handle):
        # Use generator, s.t. the entire list does not have to be stored at once
        for alignment, hsp in (
            (aln, hsp)
            for aln in record.alignments
            for hsp in aln.hsps
            if hsp.expect < E_VALUE_THRESH
        ):
            writer.writerow({
                "query_id": record.query, # meaningful, if sequence ID or a FASTA header is provided to NCBIWWW.qblast()
                "alignment_title": alignment.title,
                "alignment_length": alignment.length,
                "e_value": hsp.expect,
                # "query_snippet": hsp.query[:max_length],
                # "match_snippet": hsp.match[:max_length],
                # "subject_snippet": hsp.sbjct[:max_length],
            })
            count += 1

print(f"{count} significant hits written to {output_csv}")


53 significant hits written to filtered_blast_hits.csv
