In [3]:
# Initialize Otter
import otter
grader = otter.Notebook("Test_assign.ipynb")

## DSA 2024 Summer School Admittance Check

Thanks for your interest in attending DSA 2024 Nyeri, Kenya. To attend the summer school you have to have some level of basic Python proficiency. Completing the following notebook should ensure you have the right kind of background to benefit maximally from the Summer School. See you in Nyeri!

In [4]:
import pandas as pd
import numpy as np
%matplotlib inline
import otter

grader = otter.Notebook()

**Question 1:** write a function `isValid(s)` that takes as argument a string s containing a sequence of parenthesis '(', ')', '{', '}', '[' and ']', and  determines if the input is valid. A input string is valid if for every open parenthensis there is a close one and parenthesis is well-formed. e.g  "(){}[]" is valid.

In [3]:
def isValid(s: str) -> bool:
    # Stack to keep track of opening brackets
    stack = []
    # Dictionary to map closing brackets to their corresponding opening brackets
    bracket_map = {')': '(', '}': '{', ']': '['}
    
    # Iterate through each character in the string
    for char in s:
        # If the character is a closing bracket
        if char in bracket_map:
            # Pop from the stack if it's not empty, else assign a dummy value
            top_element = stack.pop() if stack else '#'
            # If the popped bracket doesn't match the corresponding opening bracket
            if bracket_map[char] != top_element:
                return False
        else:
            # It's an opening bracket, push onto the stack
            stack.append(char)
    
    # If the stack is empty, all brackets are matched
    return not stack

# Test cases
print(isValid("(){}[]"))  
print(isValid("({[)]"))  
print(isValid("{[]}"))   
print(isValid("((()))"))  
print(isValid("((()")) 


True
False
True
True
False


In [4]:
grader.check("q1")

**Question 2:** Given a paragraph as a string, write a function that return the number of character with odd frequencies. E.g The paragraph ``DSA 2024 Nyeri`` has *10* characters with odd frequencies. i.e the entire frequency count is given as {' ': 2, '2': 2, 'D': 1, 'S': 1, 'A': 1, '0': 1, '4': 1, 'N': 1, 'y': 1, 'e': 1, 'r': 1, 'i': 1}) and there are *10* characters with odd frequences. So the function should return *10*.

In [5]:
def oddFrequencyCounter(theParagraph: str) -> int:
    # Dictionary to store the frequency of each character
    frequency = {}
    
    # Count the frequency of each character
    for char in theParagraph:
        if char in frequency:
            frequency[char] += 1
        else:
            frequency[char] = 1
    
    # Count the number of characters with odd frequencies
    odd_count = sum(1 for count in frequency.values() if count % 2 != 0)
    
    return odd_count

# Test case
paragraph = "DSA 2024 Nyeri"
print(oddFrequencyCounter(paragraph))  # Output: 10


10


In [6]:
grader.check("q2")

**Question 3:** Write an infinite generator function `odd_squares_sum` that yields the sum of square of odd numbers. e.g $1^2 + 3^2 + 5^2 + ...$ up to a ``limit``

In [1]:
def odd_squares_sum(limit):
    current_sum = 0
    n = 1
    count = 0
    
    while count < limit:
        current_sum = n ** 2
        yield current_sum
        n += 2
        count += 1


In [4]:
grader.check("q3")

**Question 4:** Using the `odd_squares_sum` generator defined above, create a list of sum of squares up to a limit of $20$ and store the results in a numpy.array variable called `oddSumList`

In [18]:
import numpy as np

def odd_squares_sum(limit):
    n = 1
    while n * n <= limit:
        yield n ** 2
        n += 2

oddSumList = np.array(list(odd_squares_sum(361)))
print(oddSumList)


[  1   9  25  49  81 121 169 225 289 361]


In [19]:
grader.check("q4")

**Question 5:** Compute the element-wise remainder of ``oddSumList`` when divided by $5$ and merge it with ``oddSumList``. The final output stored in the variable `mergedList` should be in the form of a list of tupples e.g ``[(1,1), (4,9), (0,25), ...]``

In [36]:
import numpy as np

def odd_squares_sum(limit):
    n = 1
    while n * n <= limit:
        yield n ** 2
        n += 2

oddSumList = np.array(list(odd_squares_sum(361)))
print(oddSumList)

mergedList = [(x % 5, x) for x in oddSumList]
print(mergedList)

[  1   9  25  49  81 121 169 225 289 361]
[(np.int64(1), np.int64(1)), (np.int64(4), np.int64(9)), (np.int64(0), np.int64(25)), (np.int64(4), np.int64(49)), (np.int64(1), np.int64(81)), (np.int64(1), np.int64(121)), (np.int64(4), np.int64(169)), (np.int64(0), np.int64(225)), (np.int64(4), np.int64(289)), (np.int64(1), np.int64(361))]


In [37]:
grader.check("q5")

**Question 6:**  Write a function `greatest_common_divisor` that takes two inputs `a` and `b` and returns the greatest common divisor of the two numbers. E.g. input `(10, 15)` would return `5`

In [35]:
import numpy as np

def greatest_common_divisor(a, b):
    while b != 0:
        a, b = b, a % b
    return abs(a)

np.testing.assert_equal(greatest_common_divisor(10, 15), 5)
np.testing.assert_equal(greatest_common_divisor(15, 19), 1)
np.testing.assert_equal(greatest_common_divisor(100, 105), 5)

In [36]:
grader.check("q6")

**Question 7:**  Write a function `get_3_nearest` that takes in a point of interest ``pt`` and a **list** of points ``ptlist``  and returns a list of 3 nearest points from the point of interest ``pt``. Assume the distance between any two point is defined by the `L1-norm`.

In [39]:
def get_3_nearest(pt, ptlist):
    # Calculate the L1-norm distance between pt and each point in ptlist
    distances = [(abs(pt[0] - p[0]) + abs(pt[1] - p[1]), p) for p in ptlist]
    
    # Sort the distances and keep the 3 smallest ones
    distances.sort(key=lambda x: x[0])
    nearest_3 = [p for d, p in distances[:3]]
    
    return nearest_3

pt = (0, 0)
ptlist = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
print(get_3_nearest(pt, ptlist))  # Output: [(1, 1), (2, 2), (3, 3)]

[(1, 1), (2, 2), (3, 3)]


In [40]:
grader.check("q7")

**Question 8:**  Write a function `diagonal_vector(M)` that returns a **numpy** array of the list of **absolute** values of the main diagonal entries in the matrix $M$

In [44]:
import numpy as np

def diagonal_vector(M):
    M = np.array(M)  # Convert the input list to a numpy array
    return np.array([abs(M[i, i]) for i in range(min(M.shape))])

M = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(diagonal_vector(M))  # Output: [1, 5, 9]

[1 5 9]


In [45]:
grader.check("q8")

**Question 9:**  Write a function `flatten_reverse_lists` that takes in a list of lists and outputs a **reverse** sorted list of elements of sublists of the input list (confusing right?) <br>
Example: given `flatten_reverse_lists([[2,13,44], [6,7]])` it should return `[2,6,7,13,44]`

In [46]:
def flatten_reverse_lists(list_of_lists):
    flat_list = [item for sublist in list_of_lists for item in sublist]
    return sorted(flat_list, reverse=True)
print(flatten_reverse_lists([[2,13,44], [6,7]]))  # Output: [44, 13, 7, 6, 2]

[44, 13, 7, 6, 2]


In [47]:
grader.check("q9")

**Question 9:** Create a DataFrame mirroring the table below and assign this to `data`.

| flavor | scoops | price |
|-----|-----|-----|
| white chocolate | 1 | 2 |
| vanilla | 1 | 1.5 |
| dark chocolate | 2 | 3 |
| strawberry | 1 | 2 |
| strawberry | 3 | 4 |
| vanilla | 2 | 2 |
| mint | 1 | 4 |
| mint | 2 | 5 |
| white chocolate | 3 | 2 |
| dark chocolate | 3 | 3 |
| white chocolate | 2 | 2 |
| dark chocolate | 5 | 3 |


In [56]:
import pandas as pd

data = pd.DataFrame({
    'flavor': ['white chocolate', 'vanilla', 'dark chocolate', 'strawberry', 'strawberry', 'vanilla', 'mint', 'mint', 'white chocolate', 'dark chocolate', 'white chocolate', 'dark chocolate'],
    'scoops': [1, 1, 2, 1, 3, 2, 1, 2, 3, 3, 2, 5],
    'price': [2, 1.5, 3, 2, 4, 2, 4, 5, 2, 3, 2, 3]
})


**Question 10:** Do the following to the dataframe:
* Create a new collumn ``total_price`` whose value is equal to $scoops * price$*
* Write a function ``groupStatistics(data, groupValue)``. Internally, this function groups ``data``  by ``flavor`` and then returns statistics of a given grouped item ``groupValue`` indexed on the ``total_price`` columns. The statistics is a numpy array contains ``[mean, media, min, max, std]`` of the ``total_price`` column. The ``std`` should be rounded to 2 **decimal places**



In [54]:
import pandas as pd
import numpy as np


# Add the total_price column
data['total_price'] = data['scoops'] * data['price']

# Define the groupStatistics function
def groupStatistics(data, groupValue):
    grouped = data.groupby('flavor')
    group = grouped.get_group(groupValue)
    total_price = group['total_price']
    statistics = [
        total_price.mean(),
        total_price.median(),
        total_price.min(),
        round(total_price.std(), 2)
    ]
    return np.array(statistics)

# Example usage
print(data)
print(groupStatistics(data, 'white chocolate'))  # Output: [mean, median, min, std] for 'white chocolate'


             flavor  scoops  price  total_price
0   white chocolate       1    2.0          2.0
1           vanilla       1    1.5          1.5
2    dark chocolate       2    3.0          6.0
3        strawberry       1    2.0          2.0
4        strawberry       3    4.0         12.0
5           vanilla       2    2.0          4.0
6              mint       1    4.0          4.0
7              mint       2    5.0         10.0
8   white chocolate       3    2.0          6.0
9    dark chocolate       3    3.0          9.0
10  white chocolate       2    2.0          4.0
11   dark chocolate       5    3.0         15.0
[4. 4. 2. 2.]


In [55]:
grader.check("q11")

## Submission

Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit. **Please save before exporting!**

Download the exported ZIP. Take note of the ZIP number and proceed to fill the summer school form

In [None]:
# Save your notebook first, then run this cell to export your submission.
grader.export(pdf=False)