In [4]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW, NCBIXML
import csv

In [3]:
name = 'test'

fasta_file = f"/home/tobamo/analize/project-tobamo/analysis/whole_orf/results/blast/{name}.fasta"
output_file = f"/home/tobamo/analize/project-tobamo/analysis/whole_orf/results/blast/{name}_blast_out.tsv"

# Expected E-value threshold
evalue = 0.0001

# Open a file to store the BLAST results in tabular format
with open(output_file, "w") as output_file:
    # Read the sequences from the multifasta file
    sequences = list(SeqIO.parse(fasta_file, "fasta"))

    for seq in sequences:
        # Submit BLASTN queries to NCBI with the specified parameters
        result_handle = NCBIWWW.qblast("blastn", "nt", seq.seq, expect=evalue)

        # Parse BLAST results from XML
        blast_records = NCBIXML.parse(result_handle)

        for blast_record in blast_records:
            for alignment in blast_record.alignments:
                for hsp in alignment.hsps:
                    # Create a tab-separated row for each High-Scoring Pair (HSP)
                    output_file.write(
                        f"{seq.id}\t"  # Query ID
                        f"{alignment.hit_def}\t"  # Hit description
                        f"{hsp.expect}\t"  # E-value
                        f"{hsp.identities}\t"  # Identity count
                        f"{hsp.align_length}\t"  # Alignment length
                        f"{hsp.query_start}\t"  # Query start position
                        f"{hsp.query_end}\t"  # Query end position
                        f"{hsp.sbjct_start}\t"  # Subject start position
                        f"{hsp.sbjct_end}\t"  # Subject end position
                        "\n"  # New line for the next record
                    )

KeyboardInterrupt: 

In [8]:
input_multifasta = f"/home/tobamo/analize/project-tobamo/analysis/whole_orf/results/blast/{name}.fasta"
output_tsv = f"/home/tobamo/analize/project-tobamo/analysis/whole_orf/results/blast/{name}_blast_out.tsv"
fasta_records = list(SeqIO.parse(input_multifasta, "fasta"))

# Field names for TSV output
fieldnames = ["qaccver", "saccver", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore"]

# Open the TSV file to write the BLAST results
with open(output_tsv, "w", newline='', encoding='utf-8') as tsvfile:
    writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t')
    writer.writeheader()

    # Submit BLAST queries and retrieve results
    for sequence in sequences:
        # Submit the sequence to NCBI's BLAST service
        result_handle = NCBIWWW.qblast(
            program="blastn",  # Change to other BLAST programs if needed
            database="nt",  # Using 'nt' as the target database
            sequence=str(sequence.seq),
            hitlist_size=10,  # Retrieve the top 10 hits
            format_type="XML",  # Get the results in XML format
        )

        # Parse the BLAST results
        blast_record = NCBIXML.read(result_handle)

        # Iterate over the alignments (hits)
        for alignment in blast_record.alignments:
            for hsp in alignment.hsps:
                # Extract relevant information
                result_data = {
                    "qaccver": blast_record.query_id,
                    "saccver": alignment.hit_id,
                    "pident": hsp.identities / hsp.align_length * 100,  # Percentage identity
                    "length": hsp.align_length,
                    "mismatch": hsp.align_length - hsp.identities,
                    "gapopen": hsp.gaps,
                    "qstart": hsp.query_start,
                    "qend": hsp.query_end,
                    "sstart": hsp.sbjct_start,
                    "send": hsp.sbjct_end,
                    "evalue": hsp.expect,
                    "bitscore": hsp.bits,
                }

                # Write the result data to the TSV file
                writer.writerow(result_data)

print(f"TSV output saved to {output_tsv}")

KeyboardInterrupt: 