<h1 align="center" >  <font color="Orange"> Numpy ~ Exercises </font> </h1>

In [2]:
import numpy as np

### Fancy Indexing

In [2]:
# Select random samples (common in mini-batch training)
X = np.arange(100).reshape(20, 5)  # 20 samples, 5 features
batch_indices = np.random.choice(20, size=4, replace=False)
batch = X[batch_indices]
print(f"Random batch shape: {batch.shape}")
print(f"Batch:\n{batch}")

# Select specific features
feature_indices = [0, 2, 4]  # Select features 0, 2, 4
X_subset = X[:, feature_indices]
print(f"\nSubset features shape: {X_subset.shape}")
print(f"First 5 rows of subset:\n{X_subset[:5]}")

Random batch shape: (4, 5)
Batch:
[[ 0  1  2  3  4]
 [25 26 27 28 29]
 [30 31 32 33 34]
 [ 5  6  7  8  9]]

Subset features shape: (20, 3)
First 5 rows of subset:
[[ 0  2  4]
 [ 5  7  9]
 [10 12 14]
 [15 17 19]
 [20 22 24]]


### Feature Normalization (Z-score) using broadcasting

In [3]:
# Normalize features: (X - mean) / std
X = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9],
              [10, 11, 12]], dtype=float)

# Compute mean and std along axis 0 (for each feature)
mean = X.mean(axis=0)  # Shape: (3,)
std = X.std(axis=0)    # Shape: (3,)

print(f"Original data:\n{X}")
print(f"Mean per feature: {mean}")
print(f"Std per feature: {std}")

# Broadcasting: (4,3) - (3,) -> (4,3) - (1,3) -> (4,3)
X_normalized = (X - mean) / std
print(f"\nNormalized data:\n{X_normalized}")
print(f"New mean per feature: {X_normalized.mean(axis=0)}")
print(f"New std per feature: {X_normalized.std(axis=0)}")

Original data:
[[ 1.  2.  3.]
 [ 4.  5.  6.]
 [ 7.  8.  9.]
 [10. 11. 12.]]
Mean per feature: [5.5 6.5 7.5]
Std per feature: [3.35410197 3.35410197 3.35410197]

Normalized data:
[[-1.34164079 -1.34164079 -1.34164079]
 [-0.4472136  -0.4472136  -0.4472136 ]
 [ 0.4472136   0.4472136   0.4472136 ]
 [ 1.34164079  1.34164079  1.34164079]]
New mean per feature: [0. 0. 0.]
New std per feature: [1. 1. 1.]


### Min-Max Scaling

In [4]:
# Scale features to [0, 1]: (X - min) / (max - min)
X = np.array([[1, 2],
              [3, 4],
              [5, 6],
              [7, 8]], dtype=float)

X_min = X.min(axis=0)  # Shape: (2,)
X_max = X.max(axis=0)  # Shape: (2,)

X_scaled = (X - X_min) / (X_max - X_min)
print(f"Min-max scaled:\n{X_scaled}")

Min-max scaled:
[[0.         0.        ]
 [0.33333333 0.33333333]
 [0.66666667 0.66666667]
 [1.         1.        ]]


### UFuncs in Machine learning

- Activation Functions
- Distance Calculation

In [5]:
# ReLU (Rectified Linear Unit)
def relu(x):
    return np.maximum(0, x)

# Sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Tanh
def tanh(x):
    return np.tanh(x)

# Softmax (for classification)
def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / np.sum(exp_x, axis=0)

# Test activation functions
x = np.array([-2, -1, 0, 1, 2])
print(f"Input: {x}")
print(f"ReLU: {relu(x)}")
print(f"Sigmoid: {sigmoid(x)}")
print(f"Tanh: {tanh(x)}")

# Softmax on logits
logits = np.array([2.0, 1.0, 0.1])
print(f"\nLogits: {logits}")
print(f"Softmax: {softmax(logits)}")
print(f"Sum: {np.sum(softmax(logits))}")  # Should be 1.0

Input: [-2 -1  0  1  2]
ReLU: [0 0 0 1 2]
Sigmoid: [0.11920292 0.26894142 0.5        0.73105858 0.88079708]
Tanh: [-0.96402758 -0.76159416  0.          0.76159416  0.96402758]

Logits: [2.  1.  0.1]
Softmax: [0.65900114 0.24243297 0.09856589]
Sum: 1.0


In [6]:
# Euclidean distance between vectors
def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

# Manhattan distance
def manhattan_distance(x, y):
    return np.sum(np.abs(x - y))

# Cosine similarity
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

# Test with sample vectors
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])

print(f"Vector x: {x}")
print(f"Vector y: {y}")
print(f"Euclidean distance: {euclidean_distance(x, y):.4f}")
print(f"Manhattan distance: {manhattan_distance(x, y):.4f}")
print(f"Cosine similarity: {cosine_similarity(x, y):.4f}")

Vector x: [1 2 3]
Vector y: [4 5 6]
Euclidean distance: 5.1962
Manhattan distance: 9.0000
Cosine similarity: 0.9746


### Correlation and Covariance

**Use Case**: Feature selection, multicollinearity detection, understanding feature relationships


In [3]:
# Sample data: features vs target
feature1 = np.array([1, 2, 3, 4, 5])
feature2 = np.array([2, 4, 5, 4, 5])
target = np.array([3, 6, 7, 8, 10])

# Covariance matrix
data_matrix = np.vstack([feature1, feature2, target])
cov_matrix = np.cov(data_matrix)
print(f"Covariance matrix:\n{cov_matrix}\n")

# Correlation matrix (normalized covariance)
corr_matrix = np.corrcoef(data_matrix)
print(f"Correlation matrix:\n{corr_matrix}\n")

# Individual correlation
corr_f1_target = np.corrcoef(feature1, target)[0, 1]
corr_f2_target = np.corrcoef(feature2, target)[0, 1]
print(f"Correlation (feature1, target): {corr_f1_target:.4f}")
print(f"Correlation (feature2, target): {corr_f2_target:.4f}")

Covariance matrix:
[[2.5  1.5  4.  ]
 [1.5  1.5  2.75]
 [4.   2.75 6.7 ]]

Correlation matrix:
[[1.         0.77459667 0.97735555]
 [0.77459667 1.         0.86746041]
 [0.97735555 0.86746041 1.        ]]

Correlation (feature1, target): 0.9774
Correlation (feature2, target): 0.8675


### Boolean Indexing for Feature Selection

In [4]:
# Feature matrix with correlation to target
X = np.random.randn(100, 10)  # 100 samples, 10 features
y = np.random.randn(100)

# Calculate correlation of each feature with target
correlations = np.array([np.corrcoef(X[:, i], y)[0, 1] for i in range(X.shape[1])])
print(f"Correlations with target: {correlations}")

# Select features with |correlation| > 0.2
feature_mask = np.abs(correlations) > 0.2
print(f"\nFeature mask: {feature_mask}")
print(f"Selected features: {np.where(feature_mask)[0]}")

# Apply feature selection
X_selected = X[:, feature_mask]
print(f"\nOriginal shape: {X.shape}")
print(f"Selected shape: {X_selected.shape}")

Correlations with target: [ 0.1090727   0.07381451  0.10516154 -0.15846464 -0.02222396 -0.02626849
  0.1902869  -0.1211623   0.01169213  0.04533784]

Feature mask: [False False False False False False False False False False]
Selected features: []

Original shape: (100, 10)
Selected shape: (100, 0)


### Solving Linear Systems

In [5]:
# Solve Ax = b
A = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])

x = np.linalg.solve(A, b)
print(f"Solution: {x}")
print(f"Verification (A @ x): {A @ x}")

# Least squares solution (overdetermined system)
A_over = np.array([[1, 1], [1, 2], [1, 3]])
b_over = np.array([1, 2, 2])

x_ls, residuals, rank, s = np.linalg.lstsq(A_over, b_over, rcond=None)
print(f"\nLeast squares solution: {x_ls}")
print(f"Residuals: {residuals}")

Solution: [2. 3.]
Verification (A @ x): [9. 8.]

Least squares solution: [0.66666667 0.5       ]
Residuals: [0.16666667]


### Singular Value Decomposition (SVD)

In [6]:
# SVD: A = U @ S @ V^T
A = np.array([[1, 2, 3], [4, 5, 6]])

U, s, VT = np.linalg.svd(A)
print(f"U shape: {U.shape}")
print(f"Singular values: {s}")
print(f"VT shape: {VT.shape}")

# Reconstruct matrix
S = np.zeros_like(A, dtype=float)
S[:len(s), :len(s)] = np.diag(s)
A_reconstructed = U @ S @ VT
print(f"\nReconstructed:\n{A_reconstructed}")

U shape: (2, 2)
Singular values: [9.508032   0.77286964]
VT shape: (3, 3)

Reconstructed:
[[1. 2. 3.]
 [4. 5. 6.]]


### Norms and Distances

In [7]:
# Vector norms
v = np.array([3, 4])

print(f"L1 norm: {np.linalg.norm(v, ord=1)}")      # Manhattan
print(f"L2 norm: {np.linalg.norm(v)}")             # Euclidean
print(f"L-infinity norm: {np.linalg.norm(v, ord=np.inf)}")

# Matrix norms
A = np.array([[1, 2], [3, 4]])
print(f"\nFrobenius norm: {np.linalg.norm(A, 'fro')}")

L1 norm: 7.0
L2 norm: 5.0
L-infinity norm: 4.0

Frobenius norm: 5.477225575051661


### Distributions

In [8]:
# Common distributions in ML
np.random.seed(42)

# Binomial (e.g., coin flips)
binomial = np.random.binomial(n=10, p=0.5, size=1000)
print(f"Binomial mean: {binomial.mean():.2f} (expected: 5.0)")

# Poisson (e.g., event counts)
poisson = np.random.poisson(lam=5, size=1000)
print(f"Poisson mean: {poisson.mean():.2f} (expected: 5.0)")

# Exponential (e.g., time between events)
exponential = np.random.exponential(scale=2, size=1000)
print(f"Exponential mean: {exponential.mean():.2f} (expected: 2.0)")

# Beta distribution (useful for Bayesian methods)
beta = np.random.beta(a=2, b=5, size=1000)
print(f"Beta mean: {beta.mean():.2f} (expected: {2/(2+5):.2f})")

Binomial mean: 4.94 (expected: 5.0)
Poisson mean: 4.98 (expected: 5.0)
Exponential mean: 1.87 (expected: 2.0)
Beta mean: 0.28 (expected: 0.29)
