In [14]:
from Bio import SeqIO

# File path
fasta_file = "/Users/mohanavenkataphaneendrareddyalla/Downloads/RAD21_Protein_Sequences.fasta"

# Parse the fasta file and print each protein ID and its sequence length
for record in SeqIO.parse(fasta_file, "fasta"):
    print(f"Protein ID: {record.id}, Sequence Length: {len(record.seq)}")


Protein ID: gi|25091097|sp|O60216.2|RAD21_HUMAN, Sequence Length: 631
Protein ID: gi|341942169|sp|Q61550.3|RAD21_MOUSE, Sequence Length: 635
Protein ID: gi|82187382|sp|Q6TEL1.1|RA21A_DANRE, Sequence Length: 643
Protein ID: gi|29336593|sp|O93310.1|RAD21_XENLA, Sequence Length: 629
Protein ID: gi|110287797|sp|Q3SWX9.1|RAD21_BOVIN, Sequence Length: 630
Protein ID: gi|327478531|sp|A2AU37.2|RD21L_MOUSE, Sequence Length: 552
Protein ID: gi|259016327|sp|Q9H4I0.3|RD21L_HUMAN, Sequence Length: 556
Protein ID: gi|327488389|sp|D2HSB3.1|RD21L_AILME, Sequence Length: 554


In [16]:
import subprocess

# Path for the BLAST database to be created
database_name = "/Users/mohanavenkataphaneendrareddyalla/RAD21_Search_Database"

try:
    subprocess.run(["makeblastdb", "-in", fasta_file, "-dbtype", "prot", "-out", database_name], check=True)
    print("Database created successfully.")
except subprocess.CalledProcessError as e:
    print(f"Error creating the database: {e}")




Building a new DB, current time: 11/18/2024 01:45:18
New DB name:   /Users/mohanavenkataphaneendrareddyalla/RAD21_Search_Database
New DB title:  /Users/mohanavenkataphaneendrareddyalla/Downloads/RAD21_Protein_Sequences.fasta
Sequence type: Protein
Deleted existing Protein BLAST database named /Users/mohanavenkataphaneendrareddyalla/RAD21_Search_Database
Keep MBits: T
Maximum file size: 3000000000B
Adding sequences from FASTA; added 8 sequences in 0.0075469 seconds.


Database created successfully.


In [18]:
try:
    subprocess.run(["blastp", "-query", fasta_file, "-db", database_name, "-outfmt", "5", "-out", "RAD21_Blast_Result.out"], check=True)
    print("BLASTp executed successfully.")
except subprocess.CalledProcessError as e:
    # Catching errors if the 'blastp' command fails
    print(f"Error running BLASTp: {e}")


BLASTp executed successfully.


In [20]:
from Bio.Blast import NCBIXML

with open("/Users/mohanavenkataphaneendrareddyalla/RAD21_Blast_Result.out") as result_handle:
    blast_records = NCBIXML.parse(result_handle)
    
  
    print("Debug - Listing all queries in the file:")
    for blast_record in blast_records:
        print(f"Query: {blast_record.query}")
    
   
    result_handle.seek(0)
    blast_records = NCBIXML.parse(result_handle)
    
    print("\nDebug - Checking hits and E-values for RAD21_HUMAN:")
    for blast_record in blast_records:
        if "RAD21_HUMAN" in blast_record.query:
            print(f"\nHits for {blast_record.query}:")
            if len(blast_record.alignments) > 0:
                for alignment in blast_record.alignments:
                    for hsp in alignment.hsps:
                        
                        print(f"E-value: {hsp.expect}")
            else:
                print(f"No hits found for {blast_record.query}")


Debug - Listing all queries in the file:
Query: gi|25091097|sp|O60216.2|RAD21_HUMAN RecName: Full=Double-strand-break repair protein rad21 homolog; Short=hHR21; AltName: Full=Nuclear matrix protein 1; Short=NXP-1; AltName: Full=SCC1 homolog; Contains: RecName: Full=64-kDa C-terminal product; AltName: Full=64-kDa carboxy-terminal product; AltName: Full=65-kDa carboxy-terminal product
Query: gi|341942169|sp|Q61550.3|RAD21_MOUSE RecName: Full=Double-strand-break repair protein rad21 homolog; Short=mHR21; AltName: Full=Pokeweed agglutinin-binding protein 29; Short=PW29; AltName: Full=SCC1 homolog; Contains: RecName: Full=64-kDa C-terminal product; AltName: Full=64-kDa carboxy-terminal product
Query: gi|82187382|sp|Q6TEL1.1|RA21A_DANRE RecName: Full=Double-strand-break repair protein rad21 homolog A; AltName: Full=SCC1 homolog; Contains: RecName: Full=64-kDa C-terminal product; AltName: Full=64-kDa carboxy-terminal product
Query: gi|29336593|sp|O93310.1|RAD21_XENLA RecName: Full=Double-stra