<a href="https://colab.research.google.com/github/sakshimohta17/TechMedBuddy/blob/main/BLAST2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Install Biopython if not already installed
!pip install biopython

# Import modules
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Blast import NCBIWWW, NCBIXML
from Bio import Entrez

# Set your email for NCBI
Entrez.email = "sakshimohta2018@gmail.com"  # Replace with your email

# Function to fetch sequence from NCBI using query
def fetch_fasta_by_query(query, retmax=1):
    handle = Entrez.esearch(db="nucleotide", term=query, retmax=retmax)
    record = Entrez.read(handle)
    ids = record["IdList"]
    if not ids:
        print("No results found for query:", query)
        return []
    handle = Entrez.efetch(db="nucleotide", id=ids, rettype="fasta", retmode="text")
    fasta_records = list(SeqIO.parse(handle, "fasta"))
    return [str(rec.seq) for rec in fasta_records]

# Function to display FASTA format
def print_fasta_format(sequence, header=">BRCA1_Homo_sapiens"):
    print(f"\n{header}")
    for i in range(0, len(sequence), 60):
        print(sequence[i:i+60])

# Function to display top BLAST hit
def show_top_hit(blast_results, label=""):
    if not blast_results:
        print(f" No results for {label}")
        return
    record = blast_results[0]
    alignments = record.alignments
    if not alignments:
        print(f" No alignments found for {label}")
        return
    hit = alignments[0]
    hsp = hit.hsps[0]
    print(f"\nTop Hit for {label}")
    print("Title:", hit.title)
    print("Length:", hit.length)
    print("E-value:", hsp.expect)
    print("Query:  ", hsp.query[:60])
    print("Match:  ", hsp.match[:60])
    print("Subject:", hsp.sbjct[:60])

# --- MAIN SCRIPT ---

# Step 1: Fetch BRCA1 DNA sequence
query = "BRCA1[Gene] AND Homo sapiens[Organism]"
sequences = fetch_fasta_by_query(query)
if not sequences:
    print(" Could not fetch BRCA1 sequence.")
else:
    sequence_dna = sequences[0]
    print_fasta_format(sequence_dna)

    # Use only the first 400 bp for faster BLAST
    sequence_dna = sequence_dna.replace("\n", "").strip()
    short_seq = sequence_dna[:400]

    # Function to run a BLAST search
    def run_blast(sequence, program="blastn", database="nr", hitlist_size=1):
        print(f"\nRunning {program}...")
        result_handle = NCBIWWW.qblast(program=program, database=database,
                                       sequence=sequence, hitlist_size=hitlist_size)
        blast_records = list(NCBIXML.parse(result_handle))
        show_top_hit(blast_records, label=program)
        return blast_records

    # Run different types of BLAST
    blastn_result = run_blast(short_seq, program="blastn")
    blastx_result = run_blast(short_seq, program="blastx")

    # Translate to protein for protein-based searches
    protein_seq = str(Seq(short_seq).translate(to_stop=True))

    blastp_result = run_blast(protein_seq, program="blastp")
    tblastn_result = run_blast(protein_seq, program="tblastn")
    tblastx_result = run_blast(short_seq, program="tblastx")

    print("\n✅ All BLAST runs completed.")



>BRCA1_Homo_sapiens
TGTGTGTATGAAGTTAACTTCAAAGCAAGCTTCCTGTGCTGAGGGGGTGGGAGGTAAGGG
TGTGATGAGGCAGGGCTTCTCCTTTGGCAAAGCCTCTGTAGTCCTTCCTTAGTCACCAGG
GAGTTGTGGGAGATATAGACCAGGGTTGCAGGTGCCTGGAGTACAGGGACAGCAGACAGG
TCATGGTGGGTGAGGGGTCAGCTGGTGGGGCACAGATGCGTTTCCTCGGCAGTCCACATA
TTCATAGCTCTGAAAGACCAGCTGCTCTGAATGGCTCAGGTATGAACAGGTCAGGGTGCC
CCTGGAGAGGAAGCAGGCCTTGTTGCCATGGCTGGGAGGAAAAGGAAAGCTCTGGCCCCA
TTATACTGTCTGGGGGTGGGGAGCGGGGAGCTCCCTCATTTGCCCTTCCCTCTGGCTCCC
CACTCTGCCAAGGTACCTGGGACAGCTGCCCCACCTGTTTTCCAGCAGCTTCCAGACAGC
CCTATGCCTGTTGCCCCTAATAACTTATAGGCAAGGTGAGACTCAGAAAAGGGGCTAGAC
TTGCTATTCCCAAGAGCCAACAAAGATTCATGGGGTCAGCTTGGAAGACTCTCTGCCCAT
GAGGTGTAGCTGAGTGAGAATGAAGTCAGAAGTGAGAGCAGGGATACCAGAGCTCCAAGC
TCTACTTACTGGATGTGCAAAAGCACATGGTGGACTTTGATTTCCAGCCAGGTACAGATT
TTGCTGAGAGAGAAGAAAAGAGAACGTGGGACCACGTGTCACATGGAGCAGCCTGTCAGC
TCAGAAGATGCCTGCAGGTCCCAGTTTCCTTACCCCACAGAACTCTTTCCTTCCATCAGG
TGCCCCCAAAATGTCACTCCCACCCCTTCCCACCCTTCAGGTGAGTGTAGCAGCATGGCT
GCACTATCACTCTGACCTCCACTCACTTGGCCCAGGCACATGCTGGACCCCCATTCAGGA
GAC



 No alignments found for blastp

Running tblastn...
 No alignments found for tblastn

Running tblastx...
 No alignments found for tblastx

✅ All BLAST runs completed.
