### Imports

In [None]:
import numpy as np

### Task 1: Reshape a given array

Your task is to implement a function that reshapes a given NumPy array. Please use the template `reshape_array` for the implementation.

**NOTE:** The first line of the cell below contains the magic command `%%writefile` which saves the content of the cell into the specified file. Right now, all such commands are commented. Please make sure to follow the steps below after you complete the subtasks:

1) Uncomment **all** `%%writefile` commands
2) Re-run the whole notebook

This is important, as we will check the `.py` files generated.

In [None]:
%%writefile ./../solutions/reshape.py
from typing import List, Tuple
import numpy as np

def reshape_array(A, target_dimensionality, target_shape_requirements):
    shape = [1] * target_dimensionality
    total_elements = A.size
    
    if target_shape_requirements:
        # Calculate the product of the sizes specified in the requirements
        specified_product = int(np.prod([size for _, size in target_shape_requirements]))
        
        # Calculate the remaining elements
        remaining_elements = total_elements // specified_product
    else:
        remaining_elements = total_elements

    # Fill the unspecified dimensions with the remaining elements
    for i in range(target_dimensionality):
        if all(i != dim for dim, _ in target_shape_requirements):
            shape[i] = remaining_elements
            break

    # Update the dimensions specified in the requirements
    for dim, size in target_shape_requirements:
        shape[dim] = size

    return A.ravel().reshape(shape)

In [None]:
Tests

A = np.array(range(30))

Test 1

target_dimensionality = 2
target_shape_requirements = [(0, 5), (1, 6)]
result1 = reshape_array(A, target_dimensionality, target_shape_requirements)

assert result1.ndim == target_dimensionality
assert result1.shape == (5, 6)
assert np.allclose(result1.ravel(), A.ravel())

Test 2

target_dimensionality = 3
target_shape_requirements = [(0, 5), (2, 2)]
result2 = reshape_array(A, target_dimensionality, target_shape_requirements)

assert result2.ndim == target_dimensionality
assert result2.shape == (5, 3, 2)
assert np.allclose(result2.ravel(), A.ravel())

Test 3

target_dimensionality = 10
target_shape_requirements = [(0, 5), (2, 2)]
result3 = reshape_array(A, target_dimensionality, target_shape_requirements)

assert result3.ndim == target_dimensionality
assert result3.shape[0] == 5
assert result3.shape[2] == 2
assert np.allclose(result3.ravel(), A.ravel())

<p>Below you can see how the expected arrays look like for the sample tests above.</p>

<img src="docs/Task_1__Test_1.png" alt="Drawing" style="width: 200px;"/>

<img src="docs/Task_1__Test_2.png" alt="Drawing" style="width: 200px;"/>

<img src="docs/Task_1__Test_3.png" alt="Drawing" style="width: 200px;"/>

### Task 2: Find K nearest neighbors in N-dimensional space

Your task is to implement a function that finds K nearest neighbors using two given sets of points in N-dimensional space. Please use the template `find_knn` for the implementation.

In [None]:
%%writefile ./../solutions/knn.py
import numpy as np


def find_knn(points_a: np.ndarray, points_b: np.ndarray, k: int) -> np.ndarray:
    """For the given sets of points A and B (in N-dimensional space) returns the indices of the K closest points from B for each point from A.

    It is guaranteed that:
    1) points_a.shape == (k1, N), N >= 1, k1 >= 1
    2) points_b.shape == (k2, N), k2 >= k >= 1

    NOTE: The resulting array should have a shape of (k1, k).
    NOTE: You should use L2 metric to calculate the distances between points.
    NOTE: You should use vectorization; for-loops are too slow.
    
    Args:
        points_a: np.ndarray, the first set of points (A)
        points_b: np.ndarray, the second set of points (B)
        k: int, the number of nearest neighbor indices to return for each point from A
    Returns:
        np.ndarray, the resulting array
    """
    # Use broadcasting to compute the pairwise distances
    distances = np.linalg.norm(points_a[:, np.newaxis] - points_b, axis=2)
    
    # Find the indices of the k smallest distances for each point in points_a
    knn_indices = np.argsort(distances, axis=1)[:, :k]
    
    return knn_indices

In [None]:
Tests

Test 1
A = np.array([
    [1],
    [50],
    [100],
])
B = np.array([
    [1],
    [2],
    [3],
    [50],
    [51],
    [52],
    [101],
    [103],
    [105],
])

assert np.allclose(
    find_knn(A, B, k=1),
    np.array([[0], [3], [6]])
)

assert np.allclose(
    find_knn(A, B, k=3),
    np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
)

Test 2

A = np.array([
    [1, 100],
    [50, 50],
    [100, 1],
])
B = np.array([
    [3, 99],
    [49, 56],
    [94, 12],
])

assert np.allclose(
    find_knn(A, B, k=1),
    np.array([[0], [1], [2]])
)

assert np.allclose(
    find_knn(A, B, k=2),
    np.array([[0, 1], [1, 2], [2, 1]])
)

### Task 3: Filtering and Indexing

Your task is to implement a function that filters a given 2d array based on a set of rules. Please use the template `filter_data` for the implementation.

In [None]:
%%writefile ./../solutions/filtering.py
from typing import List
import numpy as np


def filter_data(data: np.ndarray, rules_to_ignore: List[int] = []) -> np.ndarray:
    """Filters a given array based on the set of rules described below.

    Rules (should be applied sequentially):
    0) All rows that contain NaNs (np.nan) should be removed.
    1) All rows in which the largest element is in a column with an odd index should be removed.
    2) All columns in which the number of negative elements is larger than the number of positive elements should be removed.
    3)
      3.0) For each column, you should calculate the standard deviation of elements (std_col_i).
      3.1) Calculate the mean of the standard deviations from 3.0 (mu_std_cols).
      3.2) All columns in which more than 10 elements are larger than `2 * mu_std_cols` should be removed.

    Args:
        data: np.ndarray, a given 2d array
        rules_to_ignore: List[int], a list of rules that should be ignored (from {0, 1, 2, 3})
    Returns:
        np.ndarray, the resulting array
    """
    filtered_data = data.copy()

    if 0 not in rules_to_ignore:
        filtered_data = filtered_data[~np.isnan(filtered_data).any(axis=1)]

    if 1 not in rules_to_ignore:
        max_indices = np.argmax(filtered_data, axis=1)
        filtered_data = filtered_data[max_indices % 2 == 0]

    if 2 not in rules_to_ignore:
        negative_counts = (filtered_data < 0).sum(axis=0)
        positive_counts = (filtered_data > 0).sum(axis=0)
        filtered_data = filtered_data[:, negative_counts <= positive_counts]

    if 3 not in rules_to_ignore:
        std_cols = np.std(filtered_data, axis=0)
        mu_std_cols = np.mean(std_cols)
        large_element_counts = (filtered_data > 2 * mu_std_cols).sum(axis=0)
        filtered_data = filtered_data[:, large_element_counts <= 10]

    return filtered_data

In [None]:
sts

Test 1
A = np.array([[1, 2, np.nan], [2, 4, 3], [3, 4, 5]])

assert np.allclose(
    np.nan_to_num(filter_data(A, rules_to_ignore=[0, 1, 2, 3])),
    np.nan_to_num(A)
)

Test 2
assert np.allclose(
    filter_data(A, rules_to_ignore=[1, 2, 3]),
    np.array([[2, 4, 3], [3, 4, 5]])
)

Test 3
assert np.allclose(
    np.nan_to_num(filter_data(A, rules_to_ignore=[0, 2, 3])),
    np.nan_to_num(np.array([[1, 2, np.nan], [3, 4, 5]]))
)

Test 4
np.random.seed(38)
A = np.random.normal(0, 1, (500, 500))
assert filter_data(A, rules_to_ignore=[0, 1, 3]).shape == (500, 247)

Test 5
np.random.seed(107)
A = np.random.normal(0, 1, (500, 100))
assert filter_data(A, rules_to_ignore=[0, 1, 2]).shape == (500, 38)

Test 6
np.random.seed(452)
A = np.random.normal(0, 1, (100, 10))
assert filter_data(A).shape == (50, 5)# Te