### Part 1: Nucleotide BLAST (BLASTn) with NCBI

In [1]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [2]:
from Bio.Blast import NCBIWWW 
from Bio import SeqIO, SearchIO 

In [3]:
nuc_record = SeqIO.parse("/kaggle/input/tp53-dataset/TP53.fna", format = "fasta")
multiple_records = list(nuc_record)

In [4]:
print(f"Number of sequences: {len(multiple_records)}")

Number of sequences: 2


In [5]:
for i, record in enumerate(multiple_records, 1):
    print(f"Sequence no {i}:")
    print("-" * len(f"Sequence no {i}:"))
    print(f"Length: {len(record)}")
    print(f"Description: {record.description}")
    print(f"Sequence: {record.seq[0:500]}\n") 

Sequence no 1:
--------------
Length: 19070
Description: NC_000017.11:c7687490-7668421 TP53 [organism=Homo sapiens] [GeneID=7157] [chromosome=17]
Sequence: CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAA

Sequence no 2:
--------------
Length: 19051
Description: NC_060941.1:c7591594-7572544 TP53 [organism=Homo sapiens] [GeneID=7157] [chromosome=17]
Sequence: CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGG

In [6]:
for i, record in enumerate(multiple_records, 1):    
    result_handle = NCBIWWW.qblast("blastn", "nt", record.seq[0:500])
    blast_result = SearchIO.read(result_handle, "blast-xml")

    print("-" * len(f"BLAST result for Sequence {i}:"))
    print(f"BLAST result for Sequence {i}:")
    print("-" * len(f"BLAST result for Sequence {i}:"))

    if len(blast_result)>0:
        print(blast_result[0:2])
        print("\n")
        for record in blast_result[0:1]:
            print("-" * len(f"Details of the first BLAST hit for Sequence {i}:"))
            print(f"Details of the first BLAST hit for Sequence {i}:")
            print("-" * len(f"Details of the first BLAST hit for Sequence {i}:"))
            print(f"Sequence ID: {record.id}\n")
            print(f"Sequence Description: {record.description}\n")
        
            for hit in record:
                print(f"E-value: {hit.evalue}\n")
                print(f"Alignment: \n{hit.aln}\n")
            
    else:
        print("No hits found!\n")

----------------------------
BLAST result for Sequence 1:
----------------------------
Program: blastn (2.16.1+)
  Query: No (500)
         definition line
 Target: core_nt
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  gi|35213|emb|X54156.1|  Homo sapiens p53 gene for trans...
            1      1  gi|221193241|gb|EU877026.1|  Homo sapiens isolate PI2a ...


----------------------------------------------
Details of the first BLAST hit for Sequence 1:
----------------------------------------------
Sequence ID: gi|35213|emb|X54156.1|

Sequence Description: Homo sapiens p53 gene for transformation related protein p53 (also called transformation-associated protein p53, cellular tumor antigen p53, and non-viral tumour antigen p53)

E-value: 0.0

Alignment: 
Alignment with 2 rows and 500 columns
CTCAAAAGTCTAGAGCCACCGT