In [3]:
def calculate_gc_content(dna_string):
    total_length = len(dna_string)
    gc_count = dna_string.count('G') + dna_string.count('C')
    return (gc_count / total_length) * 100

def find_highest_gc_content(strings):
    highest_gc_content = 0
    highest_gc_id = ''

    for id, dna_string in strings:
        gc_content = calculate_gc_content(dna_string)
        if gc_content > highest_gc_content:
            highest_gc_content = gc_content
            highest_gc_id = id

    return highest_gc_id, '{:.6f}'.format(highest_gc_content)

# Sample dataset (list of (ID, DNA string) tuples)
dna_strings = [
    ("Rosalind_6404", "CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC TCCCACTAATAATTCTGAGG"),
    ("Rosalind_5959", "CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT ATATCCATTTGTCAGCAGACACGC"),
    ("Rosalind_0808", "CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC TGGGAACCTGCGGGCAGTAGGTGGAAT")
]

# Find the highest GC content and its corresponding ID
result = find_highest_gc_content(dna_strings)

# Output the result
print("\n".join(result))


Rosalind_0808
60.227273


In [4]:
def transform_fasta_to_tuples(fasta_string):
    # Split the input string into lines
    lines = fasta_string.strip().split('\n')

    # Initialize variables
    tuples_list = []
    current_id = None
    current_dna_string = []

    # Process each line
    for line in lines:
        if line.startswith('>'):
            # If a new ID is found, process the previous DNA string (if any)
            if current_id is not None:
                tuples_list.append((current_id, ''.join(current_dna_string)))
                current_dna_string = []
            current_id = line[1:]  # Extract the ID (excluding '>')
        else:
            # Append DNA string lines to the current_dna_string list
            current_dna_string.append(line)

    # Append the last DNA string (if any)
    if current_id is not None:
        tuples_list.append((current_id, ''.join(current_dna_string)))

    return tuples_list

# Sample dataset (multiline string containing DNA strings in FASTA format)
input_string = '''
>Rosalind_7113
TCCAAAAAAGATTCCCATGGATTACCCATAATATGCGGACCGATTACAGATAATTGGATG
CAGTAAGCGTTCCGAATTTAGAGACCCGCGGTTTTGGGTTACTAGTGTAAACCGCGGCCA
TCACCCTAAGGGCTACTCCCCCCGTCGTTTCCTCGTTCCCGTGAAAGTAGGGTTCTCTTC
TCATTAATACTGTGCTTCTGACAGCGCAGCGTGAAATTCAAGGCCTTTAGCTGTTGAGCG
GAGGACGAAGGCAGCACGGCAGTCGAGTCATCGATGGATGTGACTACGTTACAGTTCGGA
GGACAGACGTCATGACCAGTGTGGTAGCTGCGTGTTGCAATACAGTAGCACCCGGGAGTC
GATTGTCCTGCGGTTCCGTGTCGTCCACACCGGCGGGAGGGAGCTTATAAAAGGAGGAGC
CTGCCACGTATGACTTATCCTCTCAAGTACATATGGGCTTTGAGCCCCTGGGCCGTAGGC
CTCGCTACTTAGACACCCGATCCTCGCCTCTTGTCGGTCGCCCAGTCTCGCGAGTTAAAC
GTAGTTTCCTAGCAGATCCTAAACTTTACCCCTTTAACCACCGTATAATGGAATCCATCC
GTCTGACAAATATTAGGCTGAAAGACTTTGAAGTCGTTGGGGACGGAGCACACGTAGCCG
AGGAAGCGGCTAGAACATACTTGAGTACGTCTCTCCCGCTGAGGATACACCGCTCGACTG
GCGGAGCCAAACACAATAGCAAGTGTTTGGGAAACTATGTCCGGGGAGTGCCGAGAATCT
AAAGTAAGTGTTCTTCTAAATAGCCCTCAAAGAATGCGCATTGGCGGTCTGGCAGTTATG
TCGATTCTAACATGGCAACAAGGATTTACCTTTTAGTGAACCATGAACGTTACGATGACG
TATAGGGCAGTGCGCAAGAACGACAGATACAAACGCGTA
>Rosalind_1194
TGGCCCCAACATGCGACCACTGTGTAACCGCCGTTACGATCCGCTACCTATTGTCACCTG
GCCCCCGGGGAGCTCGTTCCGGTTTCATGGACAACTGTGCATTCGATACGAACCGTGGTA
GTAGCTCGATGTCATGAACTTAACTCTGTCACGGCTCCCGGTTAACAAAGAAGAGTTTAG
GCGGGGCTCTCCATTGTTGTGCACCAGTTTTTTTGGTAGTCGAACGGCGGGTCCCCGGCA
CACGGAGCACGGTGCTCGCCAATTGTTTCGACATAGCTAAGCAAGGGCAATCGAACGCGT
GCCGCGGGGGCCACATATTACATTCTCTACCCCCTAACTCCTGTCGCTTGCATATAGGCA
TGTTATAGCGTGGGGGTACCGACCCTTGATTTGGATTGTTGGTCTTCGGCTGCCTGCTGG
CGCCGTAATCTATTAGACGAAAGCAAGGTCGAATGAGAATGCAGCCGGCGTTTGTGTCGT
CGTGTTCTGCTAGCTGTGACAACAAGGGCACAACGTGCGTATCGCCTTAAGGGCTTGTAA
AATCCAGTGAAATAAGAATCATTTACAATAGCTTCCAGTTACCCTCCTATGCCTCATATC
CAGTGCATGGATGCTGGTGCCCACTCTGTCTGTGTATGCCTGCTGGACAAGACTGACCAG
CTCTTCGCTCACGTCCGATAACGAATACGGAGGACTACATACCCTATGGCTCGTTCCAAT
GCCCATGAAGGCTCTCCCTGTGGGGCGAGACCAAGGCCGGTTATGCTGGCTGTAAAATGA
TTCAGAGGCTCCGATGGCAGGCACGTACTAGCTATCTTCGATTAAAGCTTCCACCGCTCA
ATACCGGTGTATACGAAAGTTCCCTCCAGTCGACTATATCATTAACTGCGTGG
>Rosalind_3605
GGGAAGTGAGGCGACTCGGCACTCCAAACGTGCTTCCGGGTATATTATGTGGTGGGTATA
AATTCTAATCACTAGACTCAAGCGGAGTTCGCCGTGAAACTCCTCTTTACTGGAGGACGC
GGTCAAGTAGACCATTGGATCTCTTTAGTTCCTAGACGCCCACAGGGGATTTTGTGGACC
TTCGACTATCATTAAGGCTCCACTATGACAGGACGGACGATCGCGGCTAGCTGTCTAATA
AACTGGTGGCGAACAGTTGCAGGAGATGAGCTCCGACAGAGAAGAGGACGGAAAACGCAC
GCGGATATGAGTGGACGTCTGTACTCACCTGTAACTCCCCTTCCATAATTTGTTGATGCC
ACTCGCGACTCAACATGTGACTTATAGCAGCGACACCCCCAGGAACCGGGCTGCTTAATT
CGCAATTGTCAGGCTATGCTCCCCCAACGCCAATTGCAGGGTTAAACAGTACTTTCTGCG
TACGACTTAAACATCTACTTAAGCACAGAATCTGTGCGGAATATCTGTTTACACTTTCTT
TAGCAAGGTTTCGGCAACCCTCGCTAGGAGACTTTGTACTAACCCTACGCGGCTAGCACA
GCAGCCGGCGCGCCCCAGCGGGACACGACCGCGAAGCACGTGTCAGTTACGGGACGCAAA
TTGTCGTTTATAATATTTAGAGGCCTTCAGTACTTACGAAGATTTAACCCCCCCTTATCA
TCGGTTAATATACGGGCCAGAGATCCGTAGCTCTGTATGACGTTGCCAACTTTAATTCAT
CCGGACGTTTCCCCGCATTTGCAATACTGGAAGTAACAGATTCACGAAAAAGGCGGTC
>Rosalind_4341
GCGTCGCTTAGCCTAGATGCGATAGAATGACCACGATGTGGCGCTTAATTAAAGCAGAAA
CTCCCGCTCATAGGGCAATCCACCACGCTTCGCTATAAACCATTACTTGTGTTGTTGCGG
TCACACAAGGGTCTATCACTGGTAAGGTTATCAGCACGATCGGACAACGGGTATGCAGTG
GATCAGCACTGACTTGGTACACTAACGCTTTAAAAGTCACGACTGCTGGTTTATCAAAGC
TCCGCTGCATCGTGTTTCCAGACAAGCGACAGAATCTCGGCCCGAGACGGTCTTCACCAG
GAATCTGTTCTCGGATCCAATTGTTTAATATGCTTGATCCCCTCCCGCGAGAACGTGGTC
AACACGTCTTTCACACACTTCTTTTCAAAGCTCCGGCAGCTGAAAGTTCGGCAAATGTAT
TTAACATGTCGTGAATGATAGGATACTTGAGACGACAACGACGATATGACACTAGCAACA
ACGCCTTAATTGTACGAAACAAGGATGTGAAATTGATACGAGGGAAAGGGAGGCCCTACC
GAGCTCGAAACGTCGATTCCGTTTGTGCGGTAAACTCAAGAGGGGACGGATAAGACTCGC
TCGATACCCATACGTTACCAGCGCTGCCAACGTCCCCGCCTCTCCTGCAATAGTAAAATA
ATAAGATAGGCAGATATCGCCAAAGAAGAGTACATGGGAGTGTGCCTAAGCACGGAGTAG
CGACAATATGAGTGACTAGCTTTGCACGGACTCCTTAAACTCCACAGCGCTTTCTCCTAT
AAGGTAGGTTAGATTTGCACATAACGATT
>Rosalind_5665
GGACCAGTGGAACTAAATGCGTTTTGCACGATATGTGGGTGCCTTGGCCACACATGTCGC
CGCTCAATTTATTTTATACCAAGTGAAATGTTACCGATCTTGGTGGAACGTTTCACACAA
ATCGGAATTCAGGTCCTCAGTGGCTAGAAGGGCTTGTCGCCGGACTGCGAATAGACCACG
ATAGCGATCACCATAGGTTGACGATGAACATATCCCACGCGTAAGCGCCGGATGGGAAAC
GTTTAATTCCGTATCTTACCAGATTGTGAGGAACAGTTATGTCAGAGTGTAACACTAAAG
CCCCCTCCTGAGTATCGAAAGCTACCGTTCGAATCCATAGGGAACAAGCCAGTCTACGTT
AGCTATACTTAGGCCCGTAGAGACGCCGTCGCTGGTTAATTCAGGGATTCGCGAAAACCA
AAAAGGATTGTGCTTTACTGTAGCATTCTTGATGCGCGTGCCGGGCGGACTATATGCGGC
TAAGAGGTGGTATGTGCAGCTAGGGTGAAAAGATAACAAGCAGACTCTAAGCCAGGTAAT
ACCGAGTTGCCAGTGATGCAACTTTTGGTACAGCAAGCCTTAAGTCGTCTAAATATTGCC
TTTTGAGTTCGAGCATGTCACCGTTCTCAAGAGCACTGTGTATTCAGCGATACCGTGGCG
GCCACGAAATCCAGGCTGCACAGTTGTAGCGTCTTCTTTTCGAGCATTCAGAGGCGTGAA
GGTTTTCAGCTCTCTCTAAAAAAGCTCGCCCGCGCGCCGTACTACCAGCGAAACGTCATG
TGTTTACCGCCGGACCCTCTACCAAATATCTTGTCCAGTTATAGTCAGCATAACACCCTT
TTCCTTACAACTTCAGGCCAGTAGCGCGAGGCGCACAGTCAGTCCAGAGCTATTAGATCA
CCACCTGTCTCTAGTGATCTGCCTCTATAGGCGTCTGTATCCTCCGGATTTGGGCGACTG
GGAATCA
'''

In [6]:
input_list = transform_fasta_to_tuples(input_string)

In [7]:
result = find_highest_gc_content(input_list)

# Output the result
print("\n".join(result))

Rosalind_1194
52.407615
