# CS696 Assignment 1

## Imports

In [1]:
import unittest

## Problems

### Question 1: Sum of Multiples of 3 & 5
This function returns the sum of all numbers up to 'n' which are multiples of either 3 or 5 but not both.

In [2]:
def sum_multiples_3_5(n):
    return sum([i for i in range(n) if (i%3 == 0 or i%5 == 0) and (i%15 != 0)])

### Question 2: Count of Pattern
This function returns the number of times a pattern occurs in a string. Overlapping matches are counted as well.

In [3]:
def pattern_count(text, pattern):
    return sum(text[i:].startswith(pattern) for i in range(len(text)))

### Question 3: Nth Octaldrome
If the octal representation of a number is a palindrome, then it's called an octaldrome. This function returns the nth octaldrome, starting from 1.

In [4]:
def nth_octaldrome(n):
    counter = 0
    current_int = 0
    while counter <= n:
        octal_value = oct(current_int)
        octal_value = octal_value[2:]  # strip the '0o' from the start
        if octal_value == octal_value[::-1]:
            counter += 1
        current_int += 1

    return current_int - 1

### Question 4: Most Frequent Word
Given an integer value 'n', this function finds the word(s) of length n that occur most frequently in a given string.

In [5]:
def most_frequent_word(text, n):
    word_counts = {}
    for i in range(len(text) - n):
        sub_str = text[i:i+n]
        if sub_str not in word_counts:
            word_counts[sub_str] = 1
        else:
            word_counts[sub_str] += 1
    
    max_count = 0
    max_words = set([])
    for word, count in word_counts.items():
        if count > max_count:
            max_words = set([])
            max_words.add(word)
            max_count = count
        if count == max_count:
            max_words.add(word)
    return tuple(max_words)

### Question 5: DNA Reverse Complement
Given a DNA string, this function generates the complement and reverses it.

Complement pairs:
* A & T
* C & G

In [62]:
def reverse_complement(dna_string):
    complement = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
    result = ''
    for dna_char in dna_string.upper():
        if dna_char not in ('A', 'T', 'G', 'C'): raise ValueError
        result =  complement[dna_char] + result
    return result

### Question 6: GC Content of DNA String
For a given DNA string, this function generates how many characters are G or C and returns the percentage as a value between 0 and 1.

In [86]:
def gc_content(dna_string):
    gc_count = dna_string.upper().count('G') + dna_string.upper().count('C')
    return gc_count / len(dna_string)

### Question 7: Map Values
Given an array and a map, this function returns all the values of the map whose keys are present in the array.

In [8]:
def map_values(value_array, map_dict):
    result = []
    for key, value in map_dict.items():
        if key in value_array:
            result.append(value)
    return result

## Unit Tests
Unit tests for all the above functions.

### Unit Tests: Question 1 (Sum of Multiples of 3 & 5)

In [70]:
class TestSumMultiplesOf3And5(unittest.TestCase):
    def test_sum_multiples_1(self):
        self.assertEqual(sum_multiples_3_5(1), 0)

    def test_sum_multiples_5(self):
        self.assertEqual(sum_multiples_3_5(5), 3)

    def test_sum_multiples_6(self):
        self.assertEqual(sum_multiples_3_5(6), 8)
        
    def test_sum_multiples_20(self):
        self.assertEqual(sum_multiples_3_5(20), 63)

### Unit Tests: Question 2 (Pattern Count)

In [18]:
class TestPatternCount(unittest.TestCase):
    def test_pattern_count_aabb(self):
        self.assertEqual(pattern_count("aabb", "ab"), 1)
    
    def test_pattern_count_abababa(self):
        self.assertEqual(pattern_count("abababa", "aba"), 3)
        
    def test_pattern_count_aaaa(self):
        self.assertEqual(pattern_count("aaaaa", "aa"), 4)
    
    def test_pattern_count_Abcde(self):
        self.assertEqual(pattern_count("Abcde", "abc"), 0)

### Unit Tests: Question 3 (Octaldrome)

In [71]:
class TestOctaldrome(unittest.TestCase):
    def test_nth_octaldrome_0(self):
        self.assertEqual(nth_octaldrome(0), 0)
        
    def test_nth_octaldrome_1(self):
        self.assertEqual(nth_octaldrome(1), 1)

    def test_nth_octaldrome_9(self):
        self.assertEqual(nth_octaldrome(9), 18)

### Unit Tests: Question 4 (Most Frequent Word)

In [66]:
class TestMostFrequentWord(unittest.TestCase):
    def test_most_frequent_word_0(self):
        self.assertEqual(most_frequent_word("TCGAAGCTAGACGCTAGTAGCTAGTGTGCA", 0), ("",))
        
    def test_most_frequent_word_1(self):
        self.assertCountEqual(most_frequent_word("TCGAAGCTAGACGCTAGTAGCTAGTGTGCA", 1), ("G",))
        
    def test_most_frequent_word_2(self):
        self.assertCountEqual(most_frequent_word("TCGAAGCTAGACGCTAGTAGCTAGTGTGCA", 2), ("AG",))
        
    def test_most_frequent_word_3(self):
        self.assertCountEqual(most_frequent_word("TCGAAGCTAGACGCTAGTAGCTAGTGTGCA", 3), ("TAG",))

    def test_most_frequent_word_4(self):
        self.assertCountEqual(most_frequent_word("TCGAAGCTAGACGCTAGTAGCTAGTGTGCA", 4), ("CTAG", "GCTA"))

    def test_most_frequent_word_string(self):
        self.assertCountEqual(most_frequent_word("This string has the value 'th' repeated thrice.", 2), ("th",))

### Unit Tests: Question 5 (Reverse Complement)

In [64]:
class TestReverseComplement(unittest.TestCase):
    def test_reverse_complement_gtca_upper(self):
        self.assertEqual(reverse_complement("GTCA"), "TGAC")
        
    def test_reverse_complement_gtca_lower(self):
        self.assertEqual(reverse_complement("gtca"), "TGAC")
        
    def test_reverse_complement(self):
        self.assertRaises(ValueError, reverse_complement, "xyz")

### Unit Tests: Question 6 (GC Content)

In [84]:
class TestGCContent(unittest.TestCase):
    def test_gc_content_upper(self):
        self.assertEqual(gc_content("AGCTATAG"), 0.375)
        
    def test_gc_content_lower(self):
        self.assertEqual(gc_content("agctatag"), 0.375)

    def test_gc_content_zero(self):
        self.assertEqual(gc_content("atattttaataaataAATA"), 0.0)

### Unit Tests: Question 7 (Map Values)

In [76]:
class TestMapValues(unittest.TestCase):
    def test_map_values_simple(self):
        self.assertEqual(map_values(["Tom", "Fred", "Harry"], {"Tom": 3, "Dick": 4, "Harry": 5}), [3, 5])
        
    def test_map_values_empty(self):
        self.assertEqual(map_values(["Tom", "Fred", "Harry"], {"Tommy": 3, "Dick": 4, "Harriet": 5}), [])

In [87]:
unittest.main(argv=[''], verbosity=2, exit=False)

test_gc_content_lower (__main__.TestGCContent) ... ok
test_gc_content_upper (__main__.TestGCContent) ... ok
test_gc_content_zero (__main__.TestGCContent) ... ok
test_map_values_empty (__main__.TestMapValues) ... ok
test_map_values_simple (__main__.TestMapValues) ... ok
test_most_frequent_word_0 (__main__.TestMostFrequentWord) ... ok
test_most_frequent_word_1 (__main__.TestMostFrequentWord) ... ok
test_most_frequent_word_2 (__main__.TestMostFrequentWord) ... ok
test_most_frequent_word_3 (__main__.TestMostFrequentWord) ... ok
test_most_frequent_word_4 (__main__.TestMostFrequentWord) ... ok
test_most_frequent_word_string (__main__.TestMostFrequentWord) ... ok
test_gc_content (__main__.TestNotebook) ... ok
test_map_values (__main__.TestNotebook) ... ok
test_most_frequent_word_1 (__main__.TestNotebook) ... ok
test_most_frequent_word_2 (__main__.TestNotebook) ... ok
test_most_frequent_word_3 (__main__.TestNotebook) ... ok
test_nth_octaldrome (__main__.TestNotebook) ... ok
test_pattern_count_

<unittest.main.TestProgram at 0x108fd49b0>