-
Notifications
You must be signed in to change notification settings - Fork 0
/
revc.py
41 lines (32 loc) · 1.94 KB
/
revc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# In DNA strings, symbols 'A' and 'T' are complements of each other, as are 'C' and 'G'.
# The reverse complement of a DNA string s is the string sc formed by reversing the symbols of s,
# then taking the complement of each symbol (e.g., the reverse complement of "GTCA" is "TGAC").
# Given: A DNA string s of length at most 1000 bp.
# Return: The reverse complement sc of s.
## test cases
SAMPLE_DATASET = "AAAACCCGGT"
SAMPLE_OUTPUT = "ACCGGGTTTT"
SAMPLE_TWO_OUTPUT = """CTACTCCACCTTGGACGTTTATCGGACTGGCTCGCGGACTTCTTTGGGTTGGGGAGCAAAAAACTGGACGGGGCACTTGTGGAACCTAGTGACTCATGAACCCCACCGAAGTCAGTATTGCTAATTGATCTACTAAAGAACAATAGAAGTGTGACCGGACTGGTCGTTAGCCGAGGAAGACGTAAGTATATATTTTGGAACCCGTACGGGCTATTGAAGACCTGACTGCAGATTACTTTCTAGTCACTTGCAGTGGTGACATCAGGACACAACAGTGATGGTATCTGGTGTGTTAGGCAGCCCCGGGACTTCTCTGACTGACGCTTGCATGTAGTGGCTGTATCGGGTCGCGGCGTCTTGGCCCTAGAGCAGATCGCGGTCTAACGCGCGATTAGTGAGAAGTCGGCTGGTACTCGCAGCTCCATCCCGTTGTGAAATTCGGGGATAGGGTGATCTAATCAGTGGCGACCCGGTGCCGTTGATATTTTCGTTTGTAAGGTCTAATGCAATACTAAGGTGCCACAGATTCGGTGAGCTCCCATGGCTATGTCCAGATCGGGTTCCGCCTTAACCGTGCCAATTTCAGAAGTACTTGTCCTACACTAGGGTATGCGGCAAGATTAGGAACGAGGAGCAGGCCCAACCAGCCAATGTTATTTAACTCCTGCGCTATACTTAATACAAGCACTTGCGTTCGCTCCTACTCAAAGTCCAAAATGCATATCCTTACGTTTTCCGTACGCCCGGGGTCGAGCGATAGAGAATAACTTGCATAGAACAGCTAGGCCACTAGTCGTGCCTTATCACTCCAAGGGTGAGCCAACTATGTACAATAACGATCGTTGTTCCTTGCTTTCTTATGCCGCAGAGTGCGCCA"""
REV_SYMBOLS = {
"A": "T",
"T": "A",
"C": "G",
"G": "C"
}
def main(dna_string):
rev_comp = [REV_SYMBOLS[base] for base in reversed(list(dna_string))]
rev_comp = "".join(rev_comp)
print rev_comp
return rev_comp
if __name__ == "__main__":
## Test
# main(SAMPLE_DATASET)
assert main(SAMPLE_DATASET) == SAMPLE_OUTPUT
# ## Test 2
# with open("./datasets/rosalind_revc_2.txt", 'r') as fptr:
# dna = fptr.read()
# assert main(dna) == SAMPLE_TWO_OUTPUT
## prod
with open("./datasets/rosalind_revc.txt", 'r') as fptr:
dna = fptr.read()
main(dna)