<a href="https://colab.research.google.com/github/mimomaina/pythondataanalysis/blob/main/Numpy5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy Exercise 5

### All of the questions in this exercise are attributed to rougier/numpy-100

In [2]:
import numpy as np

#### 61. Find the nearest value from a given value in an array (★★☆)

In [3]:
array = np.random.uniform(0, 100, 10)
value = 50
nearest = array[np.abs(array - value).argmin()]
print("Array:", array)
print("Nearest to", value, ":", nearest)


Array: [42.64299219 73.09489194 47.26463512  4.8027526  90.74742482 24.86147469
 43.65754889 42.6724135  33.40464178 50.228519  ]
Nearest to 50 : 50.2285189971833


#### 62. Considering two arrays with shape (1,3) and (3,1), how to compute their sum using an iterator? (★★☆)

In [4]:
A = np.arange(3).reshape(1, 3)
B = np.arange(3).reshape(3, 1)
it = np.nditer([A, B, None])
for x, y, z in it:
    z[...] = x + y
print(it.operands[2])


[[0 1 2]
 [1 2 3]
 [2 3 4]]


#### 63. Create an array class that has a name attribute (★★☆)

In [5]:
class NamedArray(np.ndarray):
    def __new__(cls, input_array, name=""):
        obj = np.asarray(input_array).view(cls)
        obj.name = name
        return obj

array = NamedArray(np.arange(10), name="MyArray")
print(array.name)


MyArray


#### 64. Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices)? (★★★)

In [6]:
vector = np.ones(10)
indices = [1, 2, 3, 2, 5]
np.add.at(vector, indices, 1)
print(vector)


[1. 2. 3. 2. 1. 2. 1. 1. 1. 1.]


#### 65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★)

In [7]:
X = [1, 2, 3, 4, 5]
I = [0, 1, 2, 3, 4]
F = np.zeros(6)
np.add.at(F, I, X)
print(F)  # F accumulates values from X based on indices I


[1. 2. 3. 4. 5. 0.]


#### 66. Considering a (w,h,3) image of (dtype=ubyte), compute the number of unique colors (★★☆)

In [8]:
image = np.random.randint(0, 256, (256, 256, 3), dtype=np.uint8)
unique_colors = len(np.unique(image.reshape(-1, 3), axis=0))
print("Unique colors:", unique_colors)


Unique colors: 65395


#### 67. Considering a four dimensions array, how to get sum over the last two axis at once? (★★★)

In [9]:
array = np.random.random((4, 3, 4, 5))
result = array.sum(axis=(-2, -1))
print(result.shape)


(4, 3)


#### 68. Considering a one-dimensional vector D, how to compute means of subsets of D using a vector S of same size describing subset  indices? (★★★)

In [10]:
D = np.random.random(10)
S = np.random.randint(0, 3, 10)
means = [D[S == i].mean() for i in np.unique(S)]
print("Means by subset:", means)


Means by subset: [0.0842285712414862, 0.5052932437846632, 0.5562896082762995]


#### 69. How to get the diagonal of a dot product? (★★★)

In [11]:
A = np.random.random((5, 5))
B = np.random.random((5, 5))
diagonal = np.sum(A * B.T, axis=1)  # Dot product diagonal
print(diagonal)


[1.76035171 1.52785898 0.75853939 1.70166934 0.54131933]


#### 70. Consider the vector [1, 2, 3, 4, 5], how to build a new vector with 3 consecutive zeros interleaved between each value? (★★★)

In [12]:
Z = np.array([1, 2, 3, 4, 5])
new_vector = np.zeros(len(Z) + (len(Z) - 1) * 3)
new_vector[::4] = Z
print(new_vector)


[1. 0. 0. 0. 2. 0. 0. 0. 3. 0. 0. 0. 4. 0. 0. 0. 5.]


#### 71. Consider an array of dimension (5,5,3), how to mulitply it by an array with dimensions (5,5)? (★★★)

In [13]:
A = np.random.random((5, 5, 3))
B = np.random.random((5, 5))
result = A * B[:, :, None]
print(result.shape)


(5, 5, 3)


#### 72. How to swap two rows of an array? (★★★)

In [14]:
array = np.arange(25).reshape(5, 5)
array[[0, 1]] = array[[1, 0]]  # Swaps row 0 and row 1
print(array)


[[ 5  6  7  8  9]
 [ 0  1  2  3  4]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


#### 73. Consider a set of 10 triplets describing 10 triangles (with shared vertices), find the set of unique line segments composing all the  triangles (★★★)

In [15]:
triangles = np.random.randint(0, 10, (10, 3))  # 10 triangles with shared vertices
segments = np.sort(np.array([triangles[:, [0, 1]],
                             triangles[:, [1, 2]],
                             triangles[:, [2, 0]]]).reshape(-1, 2), axis=1)
unique_segments = np.unique(segments, axis=0)
print(unique_segments)


[[0 5]
 [0 7]
 [1 2]
 [1 4]
 [1 6]
 [1 8]
 [2 2]
 [2 4]
 [2 6]
 [2 9]
 [4 6]
 [4 7]
 [4 8]
 [5 6]
 [5 7]
 [6 7]
 [6 8]
 [6 9]
 [7 7]
 [8 8]
 [8 9]]


#### 74. Given a sorted array C that corresponds to a bincount, how to produce an array A such that np.bincount(A) == C? (★★★)

In [16]:
C = np.array([3, 2, 1])
A = np.repeat(np.arange(len(C)), C)
print(A)


[0 0 0 1 1 2]


#### 75. How to compute averages using a sliding window over an array? (★★★)

In [17]:
array = np.arange(10)
window_size = 3
averages = np.convolve(array, np.ones(window_size)/window_size, mode='valid')
print(averages)


[1. 2. 3. 4. 5. 6. 7. 8.]


#### 76. Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is  shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) (★★★)

In [18]:
Z = np.arange(1, 15)
shape = (Z.size - 3 + 1, 3)
result = np.lib.stride_tricks.sliding_window_view(Z, 3)
print(result)


[[ 1  2  3]
 [ 2  3  4]
 [ 3  4  5]
 [ 4  5  6]
 [ 5  6  7]
 [ 6  7  8]
 [ 7  8  9]
 [ 8  9 10]
 [ 9 10 11]
 [10 11 12]
 [11 12 13]
 [12 13 14]]


#### 77. How to negate a boolean, or to change the sign of a float inplace? (★★★)

In [19]:
# Negate a boolean array
bool_array = np.array([True, False, True])
np.logical_not(bool_array, out=bool_array)
print(bool_array)

# Change the sign of a float array
float_array = np.array([1.0, -2.5, 3.0])
np.negative(float_array, out=float_array)
print(float_array)


[False  True False]
[-1.   2.5 -3. ]


#### 78. Consider 2 sets of points P0,P1 describing lines (2d) and a point p, how to compute distance from p to each line i (P0[i],P1[i])? (★★★)

In [20]:
P0 = np.random.random((10, 2))
P1 = np.random.random((10, 2))
p = np.random.random((1, 2))

# Distance computation
d = np.abs(np.cross(P1 - P0, P0 - p)) / np.linalg.norm(P1 - P0, axis=1)
print(d)


[0.50130819 0.19549072 0.14701289 0.29717131 0.41433825 0.22307048
 0.15966857 0.09601461 0.18554161 0.35451751]


#### 79. Consider 2 sets of points P0,P1 describing lines (2d) and a set of points P, how to compute distance from each point j (P[j]) to each line i (P0[i],P1[i])? (★★★)

In [24]:
P0 = np.random.random((5, 2))  # 5 starting points of lines
P1 = np.random.random((5, 2))  # 5 ending points of lines
P = np.random.random((10, 2))  # 10 points

# Compute distances
line_vec = P1 - P0
point_vec = P[:, None, :] - P0
proj_len = np.sum(point_vec * line_vec, axis=2) / np.sum(line_vec**2, axis=1)
proj = P0 + proj_len[..., None] * line_vec
distances = np.linalg.norm(P[:, None, :] - proj, axis=2)
print(distances)


[[0.47195659 0.40230336 0.15421721 0.19350983 0.08747378]
 [0.54591964 0.32457674 0.39909004 0.24765229 0.28710553]
 [0.00062934 0.23163008 0.04835698 0.2127418  0.01512128]
 [0.16535497 0.34786658 0.24837364 0.05090114 0.1846723 ]
 [0.26714519 0.22815635 0.05564728 0.3056768  0.2662752 ]
 [0.35387246 0.48597627 0.46821627 0.11554788 0.39815467]
 [0.46769112 0.21179855 0.37455973 0.33662369 0.32080336]
 [0.20591244 0.15192913 0.08933663 0.27036627 0.26646236]
 [0.10122699 0.27209971 0.20820389 0.02550217 0.17551533]
 [0.28613998 0.29097481 0.09229973 0.4282436  0.35257843]]


#### 80. Consider an arbitrary array, write a function that extract a subpart with a fixed shape and centered on a given element (pad with a `fill` value when necessary) (★★★)

In [25]:
def extract_subarray(arr, center, shape, fill_value=0):
    pad_width = [(s//2, s//2) for s in shape]
    padded = np.pad(arr, pad_width, constant_values=fill_value)
    slices = tuple(slice(c + s//2, c + s//2 + s) for c, s in zip(center, shape))
    return padded[slices]

arr = np.arange(100).reshape(10, 10)
result = extract_subarray(arr, (5, 5), (3, 3), fill_value=-1)
print(result)


[[55 56 57]
 [65 66 67]
 [75 76 77]]


#### 81. Consider an array Z = [1,2,3,4,5,6,7,8,9,10,11,12,13,14], how to generate an array R = [[1,2,3,4], [2,3,4,5], [3,4,5,6], ..., [11,12,13,14]]? (★★★)

In [21]:
Z = np.arange(1, 15)
R = np.lib.stride_tricks.sliding_window_view(Z, 4)
print(R)


[[ 1  2  3  4]
 [ 2  3  4  5]
 [ 3  4  5  6]
 [ 4  5  6  7]
 [ 5  6  7  8]
 [ 6  7  8  9]
 [ 7  8  9 10]
 [ 8  9 10 11]
 [ 9 10 11 12]
 [10 11 12 13]
 [11 12 13 14]]


#### 82. Compute a matrix rank (★★★)

In [22]:
A = np.random.random((5, 5))
rank = np.linalg.matrix_rank(A)
print(rank)


5


#### 83. How to find the most frequent value in an array?

In [23]:
array = np.random.randint(0, 10, 50)
most_frequent = np.bincount(array).argmax()
print(most_frequent)


4


#### 84. Extract all the contiguous 3x3 blocks from a random 10x10 matrix (★★★)

In [26]:
matrix = np.random.random((10, 10))
blocks = np.lib.stride_tricks.sliding_window_view(matrix, (3, 3))
print(blocks.shape)


(8, 8, 3, 3)


#### 85. Create a 2D array subclass such that Z[i,j] == Z[j,i] (★★★)

In [27]:
class SymmetricArray(np.ndarray):
    def __setitem__(self, index, value):
        i, j = index
        super(SymmetricArray, self).__setitem__((i, j), value)
        super(SymmetricArray, self).__setitem__((j, i), value)

Z = np.zeros((5, 5)).view(SymmetricArray)
Z[0, 1] = 10
print(Z)


[[ 0. 10.  0.  0.  0.]
 [10.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]]


#### 86. Consider a set of p matrices wich shape (n,n) and a set of p vectors with shape (n,1). How to compute the sum of of the p matrix products at once? (result has shape (n,1)) (★★★)

In [28]:
p, n = 3, 4
matrices = np.random.random((p, n, n))
vectors = np.random.random((p, n, 1))
result = np.einsum('ijk,ikl->jl', matrices, vectors)
print(result)


[[2.89612929]
 [2.60598612]
 [3.42051785]
 [2.69779135]]


#### 87. Consider a 16x16 array, how to get the block-sum (block size is 4x4)? (★★★)

In [29]:
array = np.random.random((16, 16))
block_sum = array.reshape(4, 4, 4, 4).sum(axis=(2, 3))
print(block_sum)


[[5.3067696  6.95499767 6.14577391 8.60871521]
 [6.4400076  8.27971905 9.17743092 7.07922625]
 [8.44339685 9.74088048 7.0023965  8.27807452]
 [7.72899102 8.05362237 9.10902342 8.79415745]]


#### 88. How to implement the Game of Life using numpy arrays? (★★★)

In [30]:
def game_of_life_step(grid):
    neighbors = sum(np.roll(np.roll(grid, i, 0), j, 1)
                    for i in (-1, 0, 1) for j in (-1, 0, 1) if (i, j) != (0, 0))
    return (neighbors == 3) | (grid & (neighbors == 2))

grid = np.random.randint(0, 2, (10, 10))
next_grid = game_of_life_step(grid)
print(next_grid)


[[1 0 1 1 1 0 0 1 1 1]
 [0 0 0 1 1 0 0 0 1 0]
 [1 1 1 0 1 1 0 0 0 1]
 [0 0 0 1 0 1 0 0 0 0]
 [0 0 0 0 1 1 1 0 0 0]
 [0 0 0 1 0 1 1 0 0 0]
 [0 0 0 0 0 1 0 1 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 1 1 0 0 1 0]
 [0 0 1 1 1 1 0 0 0 0]]


#### 89. How to get the n largest values of an array (★★★)

In [31]:
array = np.random.random(10)
n = 3
largest_values = array[np.argsort(array)[-n:]]
print(largest_values)


[0.64504032 0.71075648 0.96750472]


#### 90. Given an arbitrary number of vectors, build the cartesian product (every combinations of every item) (★★★)

In [32]:
arrays = [np.arange(3), np.arange(3, 6), np.arange(6, 9)]
cartesian_product = np.array(np.meshgrid(*arrays)).T.reshape(-1, len(arrays))
print(cartesian_product)


[[0 3 6]
 [0 4 6]
 [0 5 6]
 [1 3 6]
 [1 4 6]
 [1 5 6]
 [2 3 6]
 [2 4 6]
 [2 5 6]
 [0 3 7]
 [0 4 7]
 [0 5 7]
 [1 3 7]
 [1 4 7]
 [1 5 7]
 [2 3 7]
 [2 4 7]
 [2 5 7]
 [0 3 8]
 [0 4 8]
 [0 5 8]
 [1 3 8]
 [1 4 8]
 [1 5 8]
 [2 3 8]
 [2 4 8]
 [2 5 8]]


#### 91. How to create a record array from a regular array? (★★★)

In [33]:
array = np.random.random((4, 3))
record_array = np.core.records.fromarrays(array.T, names='col1, col2, col3')
print(record_array)


[(0.48846854, 0.17347313, 0.92991049) (0.87994347, 0.47719646, 0.52642642)
 (0.86981504, 0.22507754, 0.58816266) (0.45234959, 0.50825068, 0.7748238 )]


#### 92. Consider a large vector Z, compute Z to the power of 3 using 3 different methods (★★★)

In [34]:
Z = np.random.random(1000000)

# Method 1: Element-wise power
result1 = Z ** 3

# Method 2: Multiplication
result2 = Z * Z * Z

# Method 3: np.power
result3 = np.power(Z, 3)

print(np.allclose(result1, result2) and np.allclose(result2, result3))


True


#### 93. Consider two arrays A and B of shape (8,3) and (2,2). How to find rows of A that contain elements of each row of B regardless of the order of the elements in B? (★★★)

In [43]:
A = np.array([1, 2, 3, 4, 5])
B = np.array([[2, 3], [3, 4], [1, 5]])

# Find rows in B containing all elements of each row in A
mask = [np.all(np.isin(b, A)) for b in B]
result = B[mask]

print("Array A:\n", A)
print("Array B:\n", B)
print("Result:\n", result)

Array A:
 [1 2 3 4 5]
Array B:
 [[2 3]
 [3 4]
 [1 5]]
Result:
 [[2 3]
 [3 4]
 [1 5]]


#### 94. Considering a 10x3 matrix, extract rows with unequal values (e.g. [2,2,3]) (★★★)

In [36]:
matrix = np.random.randint(0, 5, (10, 3))

# Rows with unequal values
unequal_rows = matrix[np.all(matrix[:, None] != matrix[:, :, None], axis=(1, 2))]
print("Original matrix:\n", matrix)
print("Rows with unequal values:\n", unequal_rows)


Original matrix:
 [[1 1 1]
 [0 0 3]
 [1 3 4]
 [1 3 0]
 [1 0 3]
 [0 3 4]
 [1 2 2]
 [4 2 0]
 [0 3 2]
 [0 2 0]]
Rows with unequal values:
 []


#### 95. Convert a vector of ints into a matrix binary representation (★★★)

In [37]:
vector = np.array([3, 5, 7, 10])
binary_matrix = ((vector[:, None] & (1 << np.arange(8))) > 0).astype(int)
print("Binary representation:\n", binary_matrix)


Binary representation:
 [[1 1 0 0 0 0 0 0]
 [1 0 1 0 0 0 0 0]
 [1 1 1 0 0 0 0 0]
 [0 1 0 1 0 0 0 0]]


#### 96. Given a two dimensional array, how to extract unique rows? (★★★)

In [38]:
array = np.random.randint(0, 5, (10, 3))

# Extract unique rows
unique_rows = np.unique(array, axis=0)
print("Original array:\n", array)
print("Unique rows:\n", unique_rows)


Original array:
 [[1 1 1]
 [4 1 2]
 [3 3 1]
 [1 2 2]
 [1 0 0]
 [4 3 1]
 [2 3 0]
 [3 0 3]
 [4 3 4]
 [2 1 0]]
Unique rows:
 [[1 0 0]
 [1 1 1]
 [1 2 2]
 [2 1 0]
 [2 3 0]
 [3 0 3]
 [3 3 1]
 [4 1 2]
 [4 3 1]
 [4 3 4]]


#### 97. Considering 2 vectors A & B, write the einsum equivalent of inner, outer, sum, and mul function (★★★)

In [39]:
A = np.random.random(5)
B = np.random.random(5)

# Inner product
inner = np.einsum('i,i->', A, B)

# Outer product
outer = np.einsum('i,j->ij', A, B)

# Element-wise multiplication
mul = np.einsum('i,i->i', A, B)

# Sum of elements
sum_ = np.einsum('i->', A)

print("Inner product:", inner)
print("Outer product:\n", outer)
print("Element-wise multiplication:", mul)
print("Sum of elements:", sum_)


Inner product: 1.1543625510521358
Outer product:
 [[0.28694356 0.20302251 0.05090382 0.37038217 0.68592793]
 [0.14134662 0.10000763 0.02507491 0.18244796 0.33788384]
 [0.19567241 0.138445   0.03471231 0.25257082 0.46774762]
 [0.05334865 0.03774602 0.00946406 0.06886158 0.12752797]
 [0.27770248 0.19648412 0.04926445 0.35845393 0.66383747]]
Element-wise multiplication: [0.28694356 0.10000763 0.03471231 0.06886158 0.66383747]
Sum of elements: 2.364592094388464


#### 98. Considering a path described by two vectors (X,Y), how to sample it using equidistant samples (★★★)?

In [40]:
X = np.linspace(0, 10, num=100)
Y = np.sin(X)

# Calculate cumulative distances
distances = np.sqrt(np.diff(X)**2 + np.diff(Y)**2)
cumulative_distances = np.insert(np.cumsum(distances), 0, 0)

# Interpolate to get equidistant samples
num_samples = 20
equidistant_points = np.linspace(0, cumulative_distances[-1], num=num_samples)
new_X = np.interp(equidistant_points, cumulative_distances, X)
new_Y = np.interp(equidistant_points, cumulative_distances, Y)

print("Equidistant X samples:\n", new_X)
print("Equidistant Y samples:\n", new_Y)


Equidistant X samples:
 [ 0.          0.4643122   0.98125424  1.59533194  2.20299317  2.71385268
  3.17631325  3.64243325  4.16601205  4.78585168  5.38639837  5.8915218
  6.35260973  6.82132623  7.35145945  7.9763721   8.5694658   9.06907505
  9.52901636 10.        ]
Equidistant Y samples:
 [ 0.          0.44726543  0.83033885  0.99886416  0.80608759  0.41455857
 -0.03466526 -0.48001733 -0.85360156 -0.99610525 -0.78045917 -0.38131081
  0.0693473   0.51189183  0.87561026  0.99235321  0.75424217  0.34793299
 -0.10391869 -0.54402111]


#### 99. Given an integer n and a 2D array X, select from X the rows which can be interpreted as draws from a multinomial distribution with n degrees, i.e., the rows which only contain integers and which sum to n. (★★★)

In [41]:
n = 5
X = np.random.randint(0, 5, (10, 4))

# Select rows where the sum equals n
valid_rows = X[np.all(X == X.astype(int), axis=1) & (X.sum(axis=1) == n)]
print("Original array:\n", X)
print("Rows that are valid multinomial draws:\n", valid_rows)


Original array:
 [[3 3 4 2]
 [4 3 0 2]
 [1 3 0 4]
 [2 4 4 3]
 [2 3 4 1]
 [1 4 0 1]
 [2 2 1 2]
 [2 2 1 2]
 [3 0 2 4]
 [0 4 1 1]]
Rows that are valid multinomial draws:
 []


#### 100. Compute bootstrapped 95% confidence intervals for the mean of a 1D array X (i.e., resample the elements of an array with replacement N times, compute the mean of each sample, and then compute percentiles over the means). (★★★)

In [42]:
X = np.random.rand(100)

# Bootstrap sampling
N = 1000
bootstrap_means = np.array([np.mean(np.random.choice(X, size=len(X), replace=True)) for _ in range(N)])

# 95% confidence intervals
lower_bound = np.percentile(bootstrap_means, 2.5)
upper_bound = np.percentile(bootstrap_means, 97.5)

print("95% confidence interval for the mean:", (lower_bound, upper_bound))


95% confidence interval for the mean: (0.4191121771088326, 0.541876765992296)
