In [28]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import re

In [29]:
# Parameters
fasta_file = "SRR_001.fasta"  # Path to your .fasta file
start_codon = "ATG"
stop_codons = ["TAA", "TAG", "TGA"]

# Function to find ORFs in a sequence
def find_orfs(sequence, start_codon, stop_codons):
    orfs = []
    seq_len = len(sequence)
    
    # Forward direction: Search in the sequence itself
    for frame in range(3):  # Check all three reading frames
        for i in range(frame, seq_len, 3):
            if sequence[i:i + 3] == start_codon:
                # Start codon found, now find the stop codon
                for j in range(i + 3, seq_len, 3):
                    if sequence[j:j + 3] in stop_codons:
                        orfs.append(sequence[i:j + 3])  # Extract ORF
                        break

    return orfs

# Function to get the reverse complement of a sequence
def reverse_complement(sequence):
    return sequence.reverse_complement()

# Parse the FASTA file
for record in SeqIO.parse(fasta_file, "fasta"):
    sequence = record.seq.upper()

    # Find ORFs in the forward strand
    forward_orfs = find_orfs(sequence, start_codon, stop_codons)
    
    # Find ORFs in the reverse complement strand
    reverse_seq = reverse_complement(sequence)
    reverse_orfs = find_orfs(reverse_seq, start_codon, stop_codons)
    
    # Output the ORFs found in the forward and reverse strands
    print(f"ORFs for sequence {record.id} (Forward strand):")
    for orf in forward_orfs:
        print(f"- {orf}")
    
    print(f"\nORFs for sequence {record.id} (Reverse strand):")
    for orf in reverse_orfs:
        print(f"- {orf}")


ORFs for sequence SRR29768628.1 (Forward strand):
- ATGATTTCTCACAAGGTGCCCGGGGAGTCATCCACGTAA
- ATGAGGACCGTTAGGAACATAGGAGGCCAGCAGCCCGCTCCCCGAAGGGGCGGACTGCTAAGGACCTTCCGAAATCCAACTACGAGCTTTTTAACTGCAACAACTTTAATATACGCTATTGGAGCTGGAATTACCGCGGCTGCTGAGGTGCTGCAGTGGTGTTAG
- ATGAAAACATCCTTGGCAAATGCTTTCGCAGTAGTTCGTCTTCAATTAATCCTAGAATTTCACCTCTGA
- ATGGTAGAGACTACGACGGTATCTGATTTATCTTCATTCCCCCCTACTTTCGTTCTTGATTAA
- ATGCTTTCGCAGTAG
- ATGCCCCCAACTGTCCCTATCAATCATTACTTCAGCCAAGCGAAACCAACGAAACAAGACCGAAGTCGTATTCCATTATCCCACGCCAACGTATTCGGAGGCGAGTTACGCCTGCTTCTAGCACTCTAA

ORFs for sequence SRR29768628.1 (Reverse strand):
- ATGTTCCTAACGGTCCTCATCCGCGAGGGTGGGAGATCAACCGCTAGGATCGTTTACTTGAGGAAATTAGAGTGCTAG
- ATGATTGATAGGGACAGTTGGGGGCATTAG
- ATGTTTTCATTAATCAAGAACGAAAGTAGGGGGGAATGA
- ATGACTCCCCGGGCACCTTGTGAGAAATCATAA
- ATGGAATACGACTTCGGTCTTGTTTCGTTGGTTTCGCTTGGCTGA
- ATGAAGATAAATCAGATACCGTCGTAG
- ATGCCGACCAGGGATCCGGAGAAGTTACGTGGATGA
ORFs for sequence SRR29768628.2 (Forward strand):
- ATGCTAACGTATTCGGAGGCGAGTTACGCCTGCTTTGAAC