# This notebook will execute the lib function on test-files/ from problem 1 to 21

In [None]:
## Problem 1
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_dna.txt'
with open(testfile, 'r') as f:
    dna = f.readline().strip()
    print(prfs.count_dna_base(dna))


In [None]:
## Problem 2
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_rna.txt'
with open(testfile, 'r') as f:
    dna = f.readline().strip()
    print(prfs.transcribe_dna_to_rna(dna))

In [None]:
#   Problem 3
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_revc.txt'
with open(testfile, 'r') as f:
    dna = f.readline().strip()
    print(prfs.reverse_complement(dna))

In [None]:
#  Problem 4
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_fib.txt'
with open(testfile, 'r') as f:
    n, k = map(int, f.readline().strip().split())
    print(prfs.fibonacci(n, k))

In [None]:
# Problem 5: Calculating Point Mutations
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_hamm.txt'
with open(testfile, 'r') as f:
    dna1 = f.readline().strip()
    dna2 = f.readline().strip()
    print(prfs.hamming_distance(dna1, dna2))


In [None]:
# Problem 6: Mendel's First Law
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_iprb.txt'
with open(testfile, 'r') as f:
    k, m, n = map(int, f.readline().strip().split())
    print(prfs.mendel_first_law(k, m, n))

In [None]:
# Problem 7: Translating RNA into Protein
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_prot.txt'
with open(testfile, 'r') as f:
    rna = f.readline().strip()
    print(prfs.rnaTranslate(rna))

In [None]:
# Problem 8: Finding a Motif in DNA
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_subs.txt'
with open(testfile, 'r') as f:
    dna = f.readline().strip()
    motif = f.readline().strip()
    print(prfs.find_motif(dna, motif))

In [None]:
# Problem 9: Consensus and Profile
## Remember to load the BioPython module to read the FASTA file
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_cons.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    # make all the records into a list of sequences
    records_seq_list = [str(record.seq) for record in records]
    print(prfs.consensus_profile(records_seq_list)[0])
    print(prfs.consensus_profile(records_seq_list)[2])

In [None]:
# Problem 10: Mortal Fibonacci Rabbits
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_fibd.txt'
with open(testfile, 'r') as f:
    n, m = map(int, f.readline().strip().split())
    print(prfs.mortal_fibonacci(n, m))

In [None]:
# Problem 11: Computing GC Content
from trung_bftl import problem_functions as prfs
from Bio import SeqIO

testfile = 'test-files/rosalind_gc.txt'
id_gc_dict = {}
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    for record in records:
        gc_content = prfs.gc_content(record.seq)
        id_gc_dict[record.id] = gc_content
max_gc_id = max(id_gc_dict, key=id_gc_dict.get)
print(f'{max_gc_id}\n{id_gc_dict[max_gc_id]}')
        

In [None]:
# Problem 12: Calculating Expected Offspring
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_iev.txt'
with open(testfile, 'r') as f:
    offspring = list(map(int, f.readline().strip().split()))
    print(prfs.expected_offspring(offspring))

In [None]:
# Problem 13: Finding a Shared Motif
from trung_bftl import problem_functions as prfs
from Bio import SeqIO

testfile = 'test-files/rosalind_lcsm.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    records_seq_list = [str(record.seq) for record in records]
    print(prfs.find_shared_motif(records_seq_list))


In [None]:
# Problem 14: Independent Alleles
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_lia.txt'
with open(testfile, 'r') as f:
    k, n = map(int, f.readline().strip().split())
    print(prfs.independent_alleles(k, n))

In [None]:
# Problem 15: Finding a Protein Motif
from trung_bftl import problem_functions as prfs
import requests

testfile = 'test-files/rosalind_mprt.txt'
with open(testfile, 'r') as f:
    protein_ids = f.readlines()
    protein_ids = [id.strip() for id in protein_ids]
    for protein_id in protein_ids:
        corrected_protein_id = protein_id.split('_')[0]
        url = f'https://www.uniprot.org/uniprot/{corrected_protein_id}.fasta'
       # if url returns 404, print out an warning message and continue to the next protein
        try:
            response = requests.get(url)
            response.raise_for_status()
        except requests.exceptions.HTTPError as e:
            print(f'Protein {protein_id} not found')
            continue
        fasta = response.text
        protein_seq = fasta.split('\n', 1)[1].replace('\n', '')
        motif_positions = prfs.find_protein_motif(protein_seq)
        if motif_positions:
            print(protein_id)
            print(' '.join(map(str, motif_positions)))
## Warning, some old ID of Uniprot has been merged (such as P01046 has been merged into P01044).
## But automatically been redirected, so the script still works.

In [None]:
# Problem 16: Inferring mRNA from Protein
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_mrna.txt'
with open(testfile, 'r') as f:
    protein = f.readline().strip()
    print(prfs.mrna_from_protein(protein))

In [None]:
# Problem 17 - ORF Open Reading Frames
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_orf.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    for record in records:
        _ , proteins = prfs.find_orfs(record.seq)
        for protein in proteins:
            print(protein)
    


In [None]:
# Problem 18: Enumerating Gene Orders
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_perm.txt'
with open(testfile, 'r') as f:
    n = int(f.readline().strip())
    permutations = prfs.enumerate_gene_orders(n)
    print(len(permutations))
    for perm in permutations:
        print(' '.join(map(str, perm)))

In [None]:
# Problem 19: Calculating Protein Mass
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_prtm.txt'
with open(testfile, 'r') as f:
    protein = f.readline().strip()
    print(prfs.protein_mass(protein))

In [None]:
# Problem 20: Find restriction sites
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_revp.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    for record in records:
        _ , printed_palindromes = prfs.find_restriction_sites(record.seq)
    print(printed_palindromes)

In [None]:
# Problem 21: RNA Splicing
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_splc.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = [record.seq for record in records]
    # The longest sequence is the main sequence, others are introns
    main_sequence = max(sequences, key=len)
    introns = [seq for seq in sequences if seq != main_sequence]
    spliced_sequence = prfs.rna_splicing(main_sequence, introns)
    print(prfs.rnaTranslate(spliced_sequence))

In [None]:
# Problem 22: Introduction to Random Strings
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_prob.txt'
with open(testfile, 'r') as f:
    dna = f.readline().strip()
    print(dna)
    gc_contents = list(map(float, f.readline().strip().split()))
    print(gc_contents)
    results = []
    for gc_content in gc_contents:
        results.append(prfs.random_string_probability(dna, gc_content))
    print(' '.join(map(str, results)))

In [None]:
# Problem 23 - Overlap Graphs
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_grph.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = {record.id: str(record.seq) for record in records}
    for seq_id, seq in sequences.items():
        for other_seq_id, other_seq in sequences.items():
            if seq_id != other_seq_id:
                if prfs.is_overlap_graphs(seq, other_seq, n=3):
                    print(f'{seq_id} {other_seq_id}')

In [None]:
# Problem 24: Enumerating k-mers Lexicographically
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_lexf.txt'
with open(testfile, 'r') as f:
    alphabet = f.readline().strip().split()
    n = int(f.readline().strip())
    for item in prfs.enumerate_kmers_lexicographically(alphabet, n):
        print(item)

In [None]:
# Problem 25: Longest Increasing Subsequence
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_lgis.txt'
with open(testfile, 'r') as f:
    n = int(f.readline().strip()) # number of elements in the permutation
    permutation = list(map(int, f.readline().strip().split())) # the permutation
    lgis = ' '.join(map(str, prfs.longest_increasing_subsequence(permutation)))
    print(lgis)
    lgds = ' '.join(map(str, prfs.longest_decreasing_subsequence(permutation)))
    print(lgds)


In [None]:
# Problem 26: Genome Assembly as Shortest Superstring
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_long.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = [str(record.seq) for record in records]
    print(prfs.shortest_superstring(sequences))

In [None]:
# Problem 27: Perfect Matchings and RNA Secondary Structures *
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_pmch.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    for record in records:
        rna = str(record.seq)
        print(prfs.perfect_matchings(rna))


In [None]:
# Problem 28: Partial Permutations
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_pper.txt'
with open(testfile, 'r') as f:
    n, k = map(int, f.readline().strip().split())
    print(prfs.partial_permutations(n, k))

In [None]:
# Problem 29: Enumerating Oriented Gene Orderings
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_sign.txt'
with open(testfile, 'r') as f:
    n = int(f.readline().strip())
    signed_permutations = prfs.enumerate_oriented_gene_orderings(n)
    print(len(signed_permutations))
    for signed_perm in signed_permutations:
        print(''.join(map(str, signed_perm)).replace('+', ''))

In [None]:
# Problem 30: Finding a Spliced Motif
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_sseq.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = [str(record.seq) for record in records]
    sseq = prfs.find_spliced_motif(sequences[0], sequences[1])
    print(' '.join(map(str, sseq)))

In [None]:
# Problem 31: Transitions and Transversions
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_tran.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = [str(record.seq) for record in records]
    print(prfs.transitions_transversions(sequences[0], sequences[1]))

In [None]:
# Problem 32: Completing a Tree
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_tree.txt'
with open(testfile, 'r') as f:
    n = int(f.readline().strip())
    edges = [tuple(map(int, line.strip().split())) for line in f.readlines()]   
    print(f'number of nodes: {n}')
    print(f'edges: {edges}')
    print(f'number of edges: {len(edges)}')
    print(prfs.completing_tree(n, edges))

In [None]:
# Problem 33 - Catalan Numbers and RNA Secondary Structures
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_cat.txt'
with open(testfile, 'r') as f:
    sequence = str(SeqIO.read(f, 'fasta').seq)
    nodes = len(sequence)
    print(int(prfs.get_catalan_numbers(sequence, nodes)) % 1000000)

In [None]:
# Problem 34: Error Correction in Reads
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_corr.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences_dict = {record.id: str(record.seq) for record in records}
    sequences = list(sequences_dict.values())
    results = prfs.error_correct(sequences)
    for result in results:
        print(f'{result[0]}->{result[1]}')

In [None]:
# Problem 35: Counting Phylogenetic Ancestors
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_inod.txt'
with open(testfile, 'r') as f:
    n = int(f.readline().strip())
    print(prfs.count_phylogenetic_ancestors(n))

In [None]:
# Problem 36: k-Mer Composition
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_kmer.txt'
with open(testfile, 'r') as f:
    sequence = str(SeqIO.read(f, 'fasta').seq)
    kmer_composition = prfs.kmer_composition(sequence, 4)
    # Convert the dictionary to a list of values
    kmer_composition_list = [kmer_composition[k] for k in sorted(kmer_composition.keys())]
    print(' '.join(map(str, kmer_composition_list)))
    


In [None]:
# Problem 37: Speeding Up Motif Finding
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_kmp.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = [str(record.seq) for record in records]
    print(' '.join(map(str, prfs.get_failure_array(sequences[0]))))

In [None]:
# Problem 38: Finding a Shared Spliced Motif
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_lcsq.txt'
with open(testfile, 'r') as f:
    records = SeqIO.parse(f, 'fasta')
    sequences = [str(record.seq) for record in records]
    print(prfs.find_shared_spliced_motif(sequences))

In [None]:
# Problem 39: Ordering Strings of Varying Length Lexicographically
from trung_bftl import problem_functions as prfs
testfile = 'test-files/rosalind_lexv.txt'
with open(testfile, 'r') as f:
    # First line is the alphabets
    alphabet = f.readline().strip().split()
    # Second line is the length of the strings
    n = int(f.readline().strip())
    string = prfs.order_strings_lexicographically(alphabet, n)
    print('\n'.join(string))



In [None]:
# Problem 40: Maximum Matchings and RNA Secondary Structures
from trung_bftl import problem_functions as prfs
from Bio import SeqIO
testfile = 'test-files/rosalind_mmch.txt'
with open(testfile, 'r') as f:
    sequence = str(SeqIO.read(f, 'fasta').seq)
    print(prfs.maximum_matchings(sequence))