In [1]:
# 📦 Install Biopython
!pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [2]:
# 📚 Import required modules
from Bio.Blast import NCBIWWW
from Bio import SeqIO, SearchIO

In [5]:
# 📂 Load protein sequence from FASTA file
prot_record = SeqIO.read("/kaggle/input/TP53_Protein.txt", format="fasta")

# Display basic info
print(f"ID: {prot_record.id}\n") 
print(f"Length: {len(prot_record)}\n") 
print(f"Description: {prot_record.description}\n")
print(f"Sequence: {prot_record.seq}")


ID: sp|P04637|P53_HUMAN

Length: 393

Description: sp|P04637|P53_HUMAN Cellular tumor antigen p53 OS=Homo sapiens OX=9606 GN=TP53 PE=1 SV=4

Sequence: MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD


In [7]:
# 🔬 Run BLASTp search and parse results
result_handle = NCBIWWW.qblast("blastp", "pdb", prot_record.seq)  # BLASTp against protein structure database
blast_result = SearchIO.read(result_handle, "blast-xml")  # Parse result

print(blast_result[0:2])  # Show top 2 hits
print("\n")

hit = blast_result[0]  # First best match

print("First BLAST result:")
print("-" * len("First BLAST result:"))
print(f"Sequence ID: {hit.id}")
print(f"Sequence Description: {hit.description}\n")

print("Details of the first BLAST result:")
print("-" * len("Details of the first BLAST result:"))
details = hit[0]
print(f"E-value: {details.evalue}")
print(f"Alignment: {details.aln}")


Program: blastp (2.16.1+)
  Query: unnamed (393)
         protein product
 Target: pdb
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  pdb|7XZZ|K  Chain K, Cellular tumor antigen p53 [Homo s...
            1      1  pdb|8R1F|C  Chain C, Cellular tumor antigen p53 [Homo s...


First BLAST result:
-------------------
Sequence ID: pdb|7XZZ|K
Sequence Description: Chain K, Cellular tumor antigen p53 [Homo sapiens]

Details of the first BLAST result:
----------------------------------
E-value: 0.0
Alignment: Alignment with 2 rows and 393 columns
MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLM...DSD unnamed
MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLM...DSD pdb|7XZZ|K
