# Orfs Answer

In [59]:
import re
import os

test_seq = "AAACCATGCCCCCCGGGAAATAACCC"

def find_orfs(seq):
    stops = "TAA TAG TGA".split()
    len_seq = len(seq)
    start_pos = [m.start() for m in re.finditer('ATG', seq)]
    start_ends = []
    for i in start_pos:
        start_codon_pos = i
        start = start_codon_pos
        max_codons = (len_seq - start_codon_pos) // 3
        for codon in range(max_codons):
            codon_seq = seq[start: start + 3]
            start += 3
            if codon_seq in stops:
                stop_codon_pos = start
                break
        pos = (start_codon_pos + 1, stop_codon_pos)
        start_ends.append(pos)
    return start_ends


def find_longest_orf(orf_list):
    longest_orf = 0
    logest_orf_pos = ()
    for orf_pos in orf_list:
        orf_len = orf_pos[1] - orf_pos[0] + 1
        if orf_len > longest_orf:
            longest_orf = orf_len
            logest_orf_pos = orf_pos
    return logest_orf_pos[0], logest_orf_pos[1], int(longest_orf / 3 - 1)
    
    
def read_file(filepath):
    with open(filepath) as f:
        seq = []
        for line in f:
            if not line.startswith(">"):
                seq.append(line.strip())
    return "".join(seq)


def main():
    folder = "files"
    out_file = "answers.csv"
    num_of_files = 0
    for filename in os.listdir(folder):
        if filename.endswith(".fasta"):
            num_of_files += 1
    orf_pos_list = []
    for filenum in range(num_of_files):
        filepath = os.path.join("files", str(filenum + 1) + ".fasta")
        seq = read_file(filepath)
        #seq = test_seq
        orf_pos = find_orfs(seq)
        longest_orf = find_longest_orf(orf_pos)
        orf_pos_list.append(longest_orf)
        print(filepath, longest_orf)
    with open(out_file, "w") as f:
        for i in orf_pos_list:
            f.write("{}-{}-{}\n".format(i[0], i[1], i[2]))
    print("done")
    
main()

files\1.fasta (256, 1347, 363)
files\2.fasta (118, 1275, 385)
files\3.fasta (58, 1476, 472)
files\4.fasta (283, 1359, 358)
files\5.fasta (37, 654, 205)
files\6.fasta (112, 1479, 455)
files\7.fasta (139, 1152, 337)
files\8.fasta (184, 996, 270)
files\9.fasta (58, 1032, 324)
files\10.fasta (301, 1800, 499)
files\11.fasta (268, 1641, 457)
files\12.fasta (184, 1689, 501)
files\13.fasta (289, 921, 210)
files\14.fasta (229, 1170, 313)
files\15.fasta (199, 1620, 473)
files\16.fasta (133, 1383, 416)
files\17.fasta (277, 1086, 269)
files\18.fasta (247, 978, 243)
files\19.fasta (148, 1407, 419)
files\20.fasta (172, 1320, 382)
files\21.fasta (301, 1683, 460)
files\22.fasta (190, 1497, 435)
files\23.fasta (55, 981, 308)
files\24.fasta (103, 1350, 415)
files\25.fasta (121, 1308, 395)
files\26.fasta (166, 1461, 431)
files\27.fasta (250, 945, 231)
files\28.fasta (139, 786, 215)
files\29.fasta (151, 810, 219)
files\30.fasta (142, 1503, 453)
files\31.fasta (76, 870, 264)
files\32.fasta (157, 1491, 444)