In [1]:
# 📦 Install Biopython
!pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m54.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [2]:
# 📚 Import necessary modules
from Bio.Blast import NCBIWWW 
from Bio import SeqIO, SearchIO 

In [10]:
# 📂 Load nucleotide sequences from FASTA file
nuc_record = SeqIO.parse("/kaggle/input/TP53.fna", format="fasta")
multiple_records = list(nuc_record)

In [12]:
# 📊 Display number of sequences and basic info of sequences

print(f"Number of sequences: {len(multiple_records)}\n")

for i, record in enumerate(multiple_records, 1):
    print(f"Sequence no {i}:")
    print("-" * len(f"Sequence no {i}:"))
    print(f"Length: {len(record)}")
    print(f"Description: {record.description}")
    print(f"Sequence: {record.seq[0:500]}\n")


Number of sequences: 2

Sequence no 1:
--------------
Length: 19070
Description: NC_000017.11:c7687490-7668421 TP53 [organism=Homo sapiens] [GeneID=7157] [chromosome=17]
Sequence: CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAA

Sequence no 2:
--------------
Length: 19051
Description: NC_060941.1:c7591594-7572544 TP53 [organism=Homo sapiens] [GeneID=7157] [chromosome=17]
Sequence: CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTC

In [13]:
# 🔬 Run BLASTn search and parse results

for i, record in enumerate(multiple_records, 1):    
    result_handle = NCBIWWW.qblast("blastn", "nt", record.seq[0:500])  # BLASTn against nt database
    blast_result = SearchIO.read(result_handle, "blast-xml")  # Parse result in BLAST XML format

    print("-" * len(f"BLAST result for Sequence {i}:"))
    print(f"BLAST result for Sequence {i}:")
    print("-" * len(f"BLAST result for Sequence {i}:"))

    if len(blast_result) > 0:
        print(blast_result[0:2])  # Show top 2 hits
        print("\n")
        for record in blast_result[0:1]:  # Process first hit
            print("-" * len(f"Details of the first BLAST hit for Sequence {i}:"))
            print(f"Details of the first BLAST hit for Sequence {i}:")
            print("-" * len(f"Details of the first BLAST hit for Sequence {i}:"))
            print(f"Sequence ID: {record.id}\n")
            print(f"Sequence Description: {record.description}\n")

            for hit in record:
                print(f"E-value: {hit.evalue}\n")
                print(f"Alignment: \n{hit.aln}\n")
    else:
        print("No hits found!\n")


----------------------------
BLAST result for Sequence 1:
----------------------------
Program: blastn (2.16.1+)
  Query: No (500)
         definition line
 Target: core_nt
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  gi|221193083|gb|EU876947.1|  Homo sapiens isolate CH1b ...
            1      1  gi|221193207|gb|EU877009.1|  Homo sapiens isolate MEL8b...


----------------------------------------------
Details of the first BLAST hit for Sequence 1:
----------------------------------------------
Sequence ID: gi|221193083|gb|EU876947.1|

Sequence Description: Homo sapiens isolate CH1b WDR79 (WDR79) gene, exons 1 through 3 and partial cds; and TP53 (TP53) gene, exon 1

E-value: 0.0

Alignment: 
Alignment with 2 rows and 500 columns
CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGG...GAA No
CTCAAAAGTCTAGAGCCACCGTCCAGGG