<a href="https://colab.research.google.com/github/sakshimohta17/TechMedBuddy/blob/main/BLAST2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Biopython
!pip install Bio

# Import required modules
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Blast import NCBIWWW, NCBIXML
from Bio import Entrez

# Set your email for NCBI Entrez access
Entrez.email = "sakshimohta2018@gmail.com"  # 🔁 Replace with your actual email

def fetch_fasta_by_query(query, retmax=1):
    """
    Fetches FASTA sequences from NCBI using a search query.
    Args:
        query: Entrez query string like "Homo sapiens BRCA1".
        retmax: Maximum number of sequences to fetch.
    Returns:
        List of sequences as strings.
    """
    handle = Entrez.esearch(db="nucleotide", term=query, retmax=retmax)
    record = Entrez.read(handle)
    ids = record["IdList"]
    if not ids:
        print("No results found for query:", query)
        return []

    handle = Entrez.efetch(db="nucleotide", id=ids, rettype="fasta", retmode="text")
    fasta_records = list(SeqIO.parse(handle, "fasta"))
    return [str(rec.seq) for rec in fasta_records]

def run_blast(sequence, program="blastn", database="nr", expect=10.0,
              hitlist_size=10, entrez_query=None, word_size=None,
              matrix_name=None, gapcosts=None, filter=None):
    """
    Runs a BLAST search using NCBI.
    """
    print(f"Running {program} on sequence: {sequence[:50]}...")
    result_handle = NCBIWWW.qblast(program, database, sequence, expect=expect,
                                   hitlist_size=hitlist_size, entrez_query=entrez_query,
                                   word_size=word_size, matrix_name=matrix_name,
                                   gapcosts=gapcosts, filter=filter)
    blast_records = NCBIXML.parse(result_handle)
    return list(blast_records)

# --- Fetch Sequence from NCBI using Query ---
query = "Homo sapiens BRCA1"  # 🔁 Change to desired organism/gene
sequences = fetch_fasta_by_query(query)
if sequences:
    sequence_dna = sequences[0]
else:
    raise ValueError("Could not fetch any sequence from NCBI.")

# --- Run BLAST Searches ---
# Run blastn (DNA vs DNA)
blast_results_n = run_blast(sequence_dna, program="blastn")

# Run blastx (translated DNA vs Protein)
blast_results_x = run_blast(sequence_dna, program="blastx")

# Translate DNA to protein for protein-based searches
sequence_protein = str(Seq(sequence_dna).translate(to_stop=True))

# Run blastp (Protein vs Protein)
blast_results_p = run_blast(sequence_protein, program="blastp")

# Run tblastn (Protein vs translated nucleotide db)
blast_results_tn = run_blast(sequence_protein, program="tblastn")

# Run tblastx (translated DNA vs translated DNA)
blast_results_tx = run_blast(sequence_dna, program="tblastx")

print("BLAST runs completed.")


Running blastn on sequence: GGGAGTTGATCCAGAATTGTCTTTCTGAAAGGAAGCACTCGGAATCCTTC...
Running blastx on sequence: GGGAGTTGATCCAGAATTGTCTTTCTGAAAGGAAGCACTCGGAATCCTTC...
