<h1 align="center" >  <font color="Orange"> Numpy ~ Exercises </font> </h1>

In [1]:
import numpy as np

### Fancy Indexing

In [2]:
# Select random samples (common in mini-batch training)
X = np.arange(100).reshape(20, 5)  # 20 samples, 5 features
batch_indices = np.random.choice(20, size=4, replace=False)
batch = X[batch_indices]
print(f"Random batch shape: {batch.shape}")
print(f"Batch:\n{batch}")

# Select specific features
feature_indices = [0, 2, 4]  # Select features 0, 2, 4
X_subset = X[:, feature_indices]
print(f"\nSubset features shape: {X_subset.shape}")
print(f"First 5 rows of subset:\n{X_subset[:5]}")

Random batch shape: (4, 5)
Batch:
[[ 0  1  2  3  4]
 [25 26 27 28 29]
 [30 31 32 33 34]
 [ 5  6  7  8  9]]

Subset features shape: (20, 3)
First 5 rows of subset:
[[ 0  2  4]
 [ 5  7  9]
 [10 12 14]
 [15 17 19]
 [20 22 24]]


### Feature Normalization (Z-score) using broadcasting

In [3]:
# Normalize features: (X - mean) / std
X = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9],
              [10, 11, 12]], dtype=float)

# Compute mean and std along axis 0 (for each feature)
mean = X.mean(axis=0)  # Shape: (3,)
std = X.std(axis=0)    # Shape: (3,)

print(f"Original data:\n{X}")
print(f"Mean per feature: {mean}")
print(f"Std per feature: {std}")

# Broadcasting: (4,3) - (3,) -> (4,3) - (1,3) -> (4,3)
X_normalized = (X - mean) / std
print(f"\nNormalized data:\n{X_normalized}")
print(f"New mean per feature: {X_normalized.mean(axis=0)}")
print(f"New std per feature: {X_normalized.std(axis=0)}")

Original data:
[[ 1.  2.  3.]
 [ 4.  5.  6.]
 [ 7.  8.  9.]
 [10. 11. 12.]]
Mean per feature: [5.5 6.5 7.5]
Std per feature: [3.35410197 3.35410197 3.35410197]

Normalized data:
[[-1.34164079 -1.34164079 -1.34164079]
 [-0.4472136  -0.4472136  -0.4472136 ]
 [ 0.4472136   0.4472136   0.4472136 ]
 [ 1.34164079  1.34164079  1.34164079]]
New mean per feature: [0. 0. 0.]
New std per feature: [1. 1. 1.]


### Min-Max Scaling

In [4]:
# Scale features to [0, 1]: (X - min) / (max - min)
X = np.array([[1, 2],
              [3, 4],
              [5, 6],
              [7, 8]], dtype=float)

X_min = X.min(axis=0)  # Shape: (2,)
X_max = X.max(axis=0)  # Shape: (2,)

X_scaled = (X - X_min) / (X_max - X_min)
print(f"Min-max scaled:\n{X_scaled}")

Min-max scaled:
[[0.         0.        ]
 [0.33333333 0.33333333]
 [0.66666667 0.66666667]
 [1.         1.        ]]


### UFuncs in Machine learning

- Activation Functions
- Distance Calculation

In [5]:
# ReLU (Rectified Linear Unit)
def relu(x):
    return np.maximum(0, x)

# Sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Tanh
def tanh(x):
    return np.tanh(x)

# Softmax (for classification)
def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / np.sum(exp_x, axis=0)

# Test activation functions
x = np.array([-2, -1, 0, 1, 2])
print(f"Input: {x}")
print(f"ReLU: {relu(x)}")
print(f"Sigmoid: {sigmoid(x)}")
print(f"Tanh: {tanh(x)}")

# Softmax on logits
logits = np.array([2.0, 1.0, 0.1])
print(f"\nLogits: {logits}")
print(f"Softmax: {softmax(logits)}")
print(f"Sum: {np.sum(softmax(logits))}")  # Should be 1.0

Input: [-2 -1  0  1  2]
ReLU: [0 0 0 1 2]
Sigmoid: [0.11920292 0.26894142 0.5        0.73105858 0.88079708]
Tanh: [-0.96402758 -0.76159416  0.          0.76159416  0.96402758]

Logits: [2.  1.  0.1]
Softmax: [0.65900114 0.24243297 0.09856589]
Sum: 1.0


In [6]:
# Euclidean distance between vectors
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Manhattan distance
def manhattan_distance(x, y):
    return np.sum(np.abs(x - y))

# Cosine similarity
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

# Test with sample vectors
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])

print(f"Vector x: {x}")
print(f"Vector y: {y}")
print(f"Euclidean distance: {euclidean_distance(x, y):.4f}")
print(f"Manhattan distance: {manhattan_distance(x, y):.4f}")
print(f"Cosine similarity: {cosine_similarity(x, y):.4f}")

Vector x: [1 2 3]
Vector y: [4 5 6]
Euclidean distance: 5.1962
Manhattan distance: 9.0000
Cosine similarity: 0.9746


1. Print numpy version and configurations

2. Create a null vector of size 10 (an array of 10 zeros)

3. Find the memory size of an array and np array containing same elements

4. Reverse a vector (first element becomes last)

5. Create a 3x3 matrix with values ranging from 0 to 8

6. Find indices of non-zero elements from [1,2,0,0,4,0]

7. Sort an array

8. Double each element of an array

9. Add two randomly generated 3D tensors using loops

10. Create a function which displays a summary report (mean, median, variance, standard deviation) of the given numpy array

11. Replace all odd numbers in an array with -1, without changing the array

12. Element wise comparison of two arrays (a > b)


In [None]:
# Print numpy version and configurations
print('Version :', np.__version__)

print('\nConfig')
np.show_config()

In [None]:
# Create a null vector of size 10 (an array of 10 zeros)
arr = np.zeros(10, dtype=int)
arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [18]:
# Find the memory size of an array and np array containing same elements
import sys


arr_normal = [x for x in range(1, 1001)]
arr_np = np.array(np.arange(1, 1001))

np_array_size = arr_np.itemsize * arr_np.size

list_size = sys.getsizeof(arr_normal) + sum(sys.getsizeof(x) for x in arr_normal)

print(f'Size of numpy array: {np_array_size/1000} Kilobytes')
print(f'Size of normal array: {list_size/1000:.3f} kilobytes.')


Size of numpy array: 8.0 Kilobytes
Size of normal array: 36.856 kilobytes.


In [21]:
# Reverse a vector (first element becomes last)
vector_to_reverse = np.arange(1, 11)
print(vector_to_reverse)
vector_to_reverse = vector_to_reverse[::-1]
print(vector_to_reverse)

[ 1  2  3  4  5  6  7  8  9 10]
[10  9  8  7  6  5  4  3  2  1]


In [24]:
# Create a 3x3 matrix with values ranging from 0 to 8
matrix1 = np.arange(9).reshape((3, 3))
matrix1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [29]:
arr_non_zero = np.array([1,2,0,0,4,0])
print(*np.nonzero(arr_non_zero)[0])

0 1 4


In [10]:
names = np.array(['Mothra', 'Ghidorah', 'Shimomura'])

sorted_names = np.sort(names)
sorted_names

array(['Ghidorah', 'Mothra', 'Shimomura'], dtype='<U9')

In [19]:
# double each element of an array
arr_to_double= np.array([[1,2, 3, 4],[7, 8, 9, 10],[2, 3, 4, 5],[5, 6, 7, 8]])

print(f"Original Array : \n{arr_to_double}\n")
print(f"Doubled Array : \n{arr_to_double * 2}")


Original Array : 
[[ 1  2  3  4]
 [ 7  8  9 10]
 [ 2  3  4  5]
 [ 5  6  7  8]]

Doubled Array : 
[[ 2  4  6  8]
 [14 16 18 20]
 [ 4  6  8 10]
 [10 12 14 16]]


In [26]:
# Add two randomly generated 3D tensors using loops
random_array_1 = np.random.randint(1, 100, 24).reshape((4, 3, 2))
random_array_2 = np.random.randint(1, 100, 24).reshape((4, 3, 2))
ans = np.zeros((4, 3, 2))

print(f"Input Tensor 1 :\n{random_array_1}\n")
print(f"Input Tensor 2 :\n{random_array_2}\n")

for i in range(len(random_array_1)):
    for j in range(len(random_array_1[0])):
        for k in range(len(random_array_1[0][0])):
            ans[i][j][k] = random_array_1[i][j][k] + random_array_2[i][j][k]

print(f"Input Tensor 1 + Input Tensor 2 : \n{ans}")
# ans2 = random_array_1 + random_array_2 # using broadcasting -> much efficient   
# print(f"Input Tensor 1 + Input Tensor 2 : \n{ans2}")

Input Tensor 1 :
[[[21 50]
  [25  5]
  [94 36]]

 [[72 54]
  [44 17]
  [36  7]]

 [[34  4]
  [12 84]
  [63 63]]

 [[23  3]
  [80 77]
  [86 58]]]

Input Tensor 2 :
[[[46 88]
  [24 57]
  [77 94]]

 [[ 1  3]
  [57 34]
  [25 86]]

 [[35 84]
  [37 42]
  [23 79]]

 [[80  8]
  [62 70]
  [24 63]]]

Input Tensor 1 + Input Tensor 2 : 
[[[ 67. 138.]
  [ 49.  62.]
  [171. 130.]]

 [[ 73.  57.]
  [101.  51.]
  [ 61.  93.]]

 [[ 69.  88.]
  [ 49. 126.]
  [ 86. 142.]]

 [[103.  11.]
  [142. 147.]
  [110. 121.]]]


In [31]:
# Create a function which displays a summary report (mean, median, variance, standard deviation) of the given numpy array
def summary_report(data: np.ndarray):
    print(f'Mean : {np.mean(data):.3f}')

    print(f'Median : {np.median(data):.3f}')

    print(f'Variance : {np.var(data):.3f}')

    print(f'Variance : {np.std(data):.3f}')


data = 0.6 * (np.random.randint(1, 100, 100)) + 27
summary_report(data)

Mean : 54.786
Median : 55.200
Variance : 287.544
Variance : 16.957


In [34]:
# Replace all odd numbers in an array with -1, without changing the array
array_1 = np.array([1, 2, 3, 4, 5, 6, 7, 6, 4, 3, 2, 1])
array_out = np.where(array_1 % 2 == 1, -1, array_1)
print(f"Original Array :\n{array_1}")
print(f"Changed Array :\n{array_out}")

Original Array :
[1 2 3 4 5 6 7 6 4 3 2 1]
Changed Array :
[-1  2 -1  4 -1  6 -1  6  4 -1  2 -1]


In [None]:
# element wise comparison
array_2 = np.array(['a', 'b', 'h'])
array_3 = np.array(['c', 'f', 'd'])
array_2 > array_3

array([False, False,  True])

In [14]:
iris_data = np.random.rand(100, 4) * 6
iris_data = np.round(iris_data, 3)

In [20]:
iris_data_filtered = (iris_data[:, 0] < 2.0) & (iris_data[:, 2] > 3)
iris_data[iris_data_filtered]

array([[0.296, 4.913, 5.936, 1.16 ],
       [1.057, 3.45 , 5.404, 2.229],
       [1.16 , 3.125, 5.28 , 0.57 ],
       [1.31 , 4.574, 3.16 , 2.555],
       [1.371, 2.825, 5.517, 4.494],
       [1.854, 5.501, 5.069, 4.366],
       [1.203, 4.08 , 4.606, 1.   ],
       [0.34 , 4.988, 4.547, 3.852],
       [0.362, 2.839, 3.788, 1.377],
       [0.334, 1.283, 4.83 , 3.082],
       [1.632, 4.039, 3.002, 2.005],
       [0.086, 3.17 , 3.698, 5.287],
       [1.87 , 5.808, 4.191, 3.428],
       [0.185, 5.964, 3.817, 5.661],
       [1.631, 5.885, 3.64 , 2.334],
       [1.17 , 1.372, 5.712, 5.255],
       [1.031, 3.978, 4.78 , 2.137],
       [0.752, 1.239, 3.695, 5.031]])

In [22]:
iris_data[np.random.randint(99, size=20), np.random.randint(3, size=20)] = np.nan
iris_data

array([[0.296, 4.913, 5.936, 1.16 ],
       [1.085, 4.842, 0.493, 3.886],
       [1.057,   nan, 5.404, 2.229],
       [3.968, 4.139, 2.918, 5.52 ],
       [1.16 , 3.125, 5.28 , 0.57 ],
       [1.31 , 4.574, 3.16 , 2.555],
       [3.012, 3.665, 0.622, 3.34 ],
       [3.055, 5.249, 4.005, 0.018],
       [0.171, 5.644, 1.839, 1.48 ],
       [5.594, 4.266,   nan, 1.057],
       [4.595, 3.274, 5.061, 4.185],
       [1.371, 2.825, 5.517, 4.494],
       [3.659, 1.508, 1.979, 3.896],
       [3.222, 5.118, 4.279, 0.607],
       [2.097, 5.077, 0.182, 3.884],
       [1.095, 3.688, 1.505, 0.058],
       [1.378, 4.017, 1.204, 5.263],
       [2.695, 4.077, 5.468, 1.538],
       [4.105, 5.286, 1.652, 1.541],
       [5.323, 2.134, 1.5  , 3.636],
       [1.854, 5.501, 5.069, 4.366],
       [4.458, 0.742, 3.74 , 0.634],
       [2.348, 5.008, 1.72 , 1.917],
       [2.598, 5.241, 3.095, 0.925],
       [1.203, 4.08 , 4.606, 1.   ],
       [0.418, 1.437, 0.469, 1.958],
       [2.128, 4.02 ,   nan, 0.768],
 

In [26]:
# removing nan values
output = np.array([~np.any(np.isnan(row)) for row in iris_data])
iris_data[output]

array([[0.296, 4.913, 5.936, 1.16 ],
       [1.085, 4.842, 0.493, 3.886],
       [3.968, 4.139, 2.918, 5.52 ],
       [1.16 , 3.125, 5.28 , 0.57 ],
       [1.31 , 4.574, 3.16 , 2.555],
       [3.012, 3.665, 0.622, 3.34 ],
       [3.055, 5.249, 4.005, 0.018],
       [0.171, 5.644, 1.839, 1.48 ],
       [4.595, 3.274, 5.061, 4.185],
       [1.371, 2.825, 5.517, 4.494],
       [3.659, 1.508, 1.979, 3.896],
       [3.222, 5.118, 4.279, 0.607],
       [2.097, 5.077, 0.182, 3.884],
       [1.095, 3.688, 1.505, 0.058],
       [1.378, 4.017, 1.204, 5.263],
       [2.695, 4.077, 5.468, 1.538],
       [4.105, 5.286, 1.652, 1.541],
       [5.323, 2.134, 1.5  , 3.636],
       [1.854, 5.501, 5.069, 4.366],
       [4.458, 0.742, 3.74 , 0.634],
       [2.348, 5.008, 1.72 , 1.917],
       [2.598, 5.241, 3.095, 0.925],
       [1.203, 4.08 , 4.606, 1.   ],
       [0.418, 1.437, 0.469, 1.958],
       [4.082, 3.402, 0.126, 4.988],
       [5.076, 4.023, 3.116, 0.668],
       [2.376, 5.158, 0.965, 3.812],
 

In [27]:
# Find duplicate entries, and mark them as true for the second occurrence onwards. First occurrence should be false
a = np.random.randint(1, 6, 10)
print(a)

out = np.full(a.shape[0], True)

unique_vals = np.unique(a, return_index=True)[1]

out[unique_vals] = False
out


[3 1 1 2 5 5 1 3 4 4]


array([False, False,  True, False, False,  True,  True,  True, False,
        True])