In [None]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

def find_longest_coding_sequence(record):
    start_codons = ["ATG"]
    stop_codons = ["TAA", "TAG", "TGA"]

    longest_coding_sequence = ""
    for start in start_codons:
        for stop in stop_codons:
            start_pos = record.seq.find(start)
            stop_pos = record.seq.rfind(stop)

            if start_pos != -1 and stop_pos != -1 and start_pos < stop_pos:
                coding_sequence = record.seq[start_pos:stop_pos + 3]
                if len(coding_sequence) > len(longest_coding_sequence):
                    longest_coding_sequence = coding_sequence

    return longest_coding_sequence

def extract_longest_coding_sequences(mrna_file, output_folder):
    sequences = SeqIO.parse(mrna_file, "fasta")
    
    protein_records = []

    for record in sequences:
        longest_coding_sequence = find_longest_coding_sequence(record)

        # Translate coding sequence to protein
        protein_sequence = longest_coding_sequence.translate()

        # Create a SeqRecord for the protein sequence
        protein_record = SeqRecord(protein_sequence, id=record.id, description=f"Translated protein from {record.id}")

        protein_records.append(protein_record)

    # Define the output file path and name
    output_file = f"{output_folder}/all_proteins.fasta"

    # Write all protein sequences to a single FASTA file
    SeqIO.write(protein_records, output_file, "fasta")

# Example usage
mrna_file = "path/to/mrna_sequences.fasta"
output_folder = "path/to/output_folder"
extract_longest_coding_sequences(mrna_file, output_folder)