In [2]:
pip install numpy

Looking in indexes: https://pypi.org/simple, https://raghav.dinesh%40ibm.com:****@na.artifactory.swg-devops.com/artifactory/api/pypi/wcp-auditree-pypi-virtual/simple
Collecting numpy
  Downloading https://na.artifactory.swg-devops.com/artifactory/api/pypi/wcp-auditree-pypi-virtual/packages/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl (6.5 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m547.0 kB/s[0m  [33m0:00:12[0m1.5 kB/s[0m eta [36m0:00:01[0m:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.4.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
# Block 1: Basic Vector Operations

import numpy as np

# Create vectors
u = np.array([1, 2, 3])
v = np.array([4, 5, 6])

# Addition
sum_vec = u + v  # [5, 7, 9]

# Scalar multiplication
scaled = 2 * u  # [2, 4, 6]

# Dot product
dot = np.dot(u, v)  # 32
# Alternative: u @ v (matrix multiplication operator)

# Norm (magnitude)
norm = np.linalg.norm(u)  # 3.7416...
# Alternative: np.sqrt(np.sum(u**2))

# Verify dot product formula: u·v = ||u|| ||v|| cos(θ)
cos_theta = dot / (np.linalg.norm(u) * np.linalg.norm(v))
theta = np.arccos(cos_theta)  # Angle in radians

In [4]:
# Block 2: Spam Detection with Cosine Similarity
import numpy as np

# Spam detection feature vector:
# Email 1: [word_count=150, link_count=5, caps_ratio=0.2]
# Email 2: [word_count=200, link_count=3, caps_ratio=0.1]

email1 = np.array([150, 5, 0.2])
email2 = np.array([200, 3, 0.1])

# Similarity via dot product:
v1_dot_v2 = np.dot(email1, email2)
print(f"Dot product: {v1_dot_v2}")  # 30,000 + 15 + 0.02 = 30,015.02

# Problem: word_count dominates! Need normalization.

# After normalization (divide by norm):
v1_norm = email1 / np.linalg.norm(email1)
v2_norm = email2 / np.linalg.norm(email2)

print(f"Normalized v1: {v1_norm}")
print(f"Normalized v2: {v2_norm}")

# Now dot product ≈ 1.0 (very similar emails)
similarity = np.dot(v1_norm, v2_norm)
print(f"Cosine similarity: {similarity}")

Dot product: 30015.02
Normalized v1: [0.99944402 0.0333148  0.00133259]
Normalized v2: [9.99887394e-01 1.49983109e-02 4.99943697e-04]
Cosine similarity: 0.9998318081596929


In [5]:
# Block 3: Matrix Operations
import numpy as np

# Create matrices
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# Matrix multiplication
C = A @ B  # Modern Python (3.5+)
# Alternative: np.matmul(A, B) or np.dot(A, B)
print(f"A @ B:\n{C}")

# Transpose
A_T = A.T
# Alternative: np.transpose(A)
print(f"A transpose:\n{A_T}")

# Identity matrix
I = np.eye(3)  # 3×3 identity
print(f"Identity matrix:\n{I}")

# Matrix inverse
try:
    A_inv = np.linalg.inv(A)
    # Verify: A @ A_inv should be identity
    print(f"A inverse:\n{A_inv}")
    print(f"A @ A_inv:\n{A @ A_inv}")  # Close to [[1,0],[0,1]]
except np.linalg.LinAlgError:
    print("Matrix is singular (not invertible)")

# Determinant (check if invertible)
det = np.linalg.det(A)
print(f"Determinant of A: {det}")
if abs(det) < 1e-10:
    print("Matrix is effectively singular")

# Element-wise operations (Hadamard product)
elementwise = A * B  # NOT matrix multiplication!
print(f"Element-wise multiply:\n{elementwise}")

# Shape information
print(f"A shape: {A.shape}")  # (2, 2)
print(f"Number of dimensions: {A.ndim}")  # 2

A @ B:
[[19 22]
 [43 50]]
A transpose:
[[1 3]
 [2 4]]
Identity matrix:
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
A inverse:
[[-2.   1. ]
 [ 1.5 -0.5]]
A @ A_inv:
[[1.0000000e+00 0.0000000e+00]
 [8.8817842e-16 1.0000000e+00]]
Determinant of A: -2.0000000000000004
Element-wise multiply:
[[ 5 12]
 [21 32]]
A shape: (2, 2)
Number of dimensions: 2


In [7]:
# Block 4: Neural Network Layer Example
import numpy as np

# Neural network layer with 3 inputs, 2 outputs:
np.random.seed(42)

# Input vector: x = [x₁, x₂, x₃]  (3×1)
x = np.array([1.0, 2.0, 3.0])

# Weight matrix: W (2×3)
W = np.array([[0.5, 0.3, 0.2],
              [0.1, 0.4, 0.6]])

# Bias vector: b = [b₁, b₂]  (2×1)
b = np.array([0.1, 0.2])

# Forward pass: y = Wx + b
y = W @ x + b

print(f"Input x: {x}")
print(f"Weight matrix W:\n{W}")
print(f"Bias b: {b}")
print(f"Output y: {y}")

# Expected output: [1.8, 2.9]
# y₁ = 0.5×1.0 + 0.3×2.0 + 0.2×3.0 + 0.1 = 1.8
# y₂ = 0.1×1.0 + 0.4×2.0 + 0.6×3.0 + 0.2 = 2.9

# Security attack: Adversary wants to maximize y₁. They can perturb x 
# in direction of first row of W: perturbation = ε[0.5, 0.3, 0.2]
epsilon = 0.1
perturbation = epsilon * W[0]  # Direction of first row
x_adversarial = x + perturbation
y_adversarial = W @ x_adversarial + b

print(f"\nAdversarial perturbation: {perturbation}")
print(f"Adversarial input: {x_adversarial}")
print(f"Adversarial output: {y_adversarial}")
print(f"y₁ increased by: {y_adversarial[0] - y[0]}")

Input x: [1. 2. 3.]
Weight matrix W:
[[0.5 0.3 0.2]
 [0.1 0.4 0.6]]
Bias b: [0.1 0.2]
Output y: [1.8 2.9]

Adversarial perturbation: [0.05 0.03 0.02]
Adversarial input: [1.05 2.03 3.02]
Adversarial output: [1.838 2.929]
y₁ increased by: 0.03799999999999981


In [8]:
# Block 5: Dot Product Deep Dive
import numpy as np

# Vectors
u = np.array([1, 2, 3])
v = np.array([4, 5, 6])

# Dot product (multiple ways)
dot1 = np.dot(u, v)
dot2 = u @ v
dot3 = np.sum(u * v)  # Manual: elementwise multiply then sum

print(f"Dot product (np.dot): {dot1}")
print(f"Dot product (@): {dot2}")
print(f"Dot product (manual): {dot3}")

# Verify geometric interpretation: u·v = ||u|| ||v|| cos(θ)
magnitude_u = np.linalg.norm(u)
magnitude_v = np.linalg.norm(v)
cos_theta = dot1 / (magnitude_u * magnitude_v)
theta_radians = np.arccos(cos_theta)
theta_degrees = np.degrees(theta_radians)

print(f"\nAngle between u and v: {theta_degrees:.2f}°")

# Projection of v onto u
proj_v_on_u = (np.dot(v, u) / np.dot(u, u)) * u
print(f"Projection of v onto u: {proj_v_on_u}")

# Check orthogonality
a = np.array([1, 0])
b = np.array([0, 1])
print(f"\na·b = {np.dot(a, b)} (orthogonal)")  # 0 (orthogonal)

# Weighted sum (ML prediction)
weights = np.array([0.5, 0.3, 0.2])
features = np.array([100, 50, 25])
prediction = np.dot(weights, features)  # 50 + 15 + 5 = 70
print(f"\nWeighted sum prediction: {prediction}")

Dot product (np.dot): 32
Dot product (@): 32
Dot product (manual): 32

Angle between u and v: 12.93°
Projection of v onto u: [2.28571429 4.57142857 6.85714286]

a·b = 0 (orthogonal)

Weighted sum prediction: 70.0


In [9]:
# Block 6: Cosine Similarity for Documents
import numpy as np

# Cosine similarity for document comparison (spam detection):
# word_vector = [counts: "buy", "click", "free", "meeting", "money"]

doc1_spam = np.array([10, 5, 20, 0, 30])   # spam
doc2_spam = np.array([8, 4, 18, 0, 25])    # spam
doc3_legit = np.array([1, 0, 2, 10, 1])    # legit

def cosine_similarity(u, v):
    """Cosine similarity = u·v / (||u|| ||v||)"""
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

# Similarity between documents
sim_1_2 = cosine_similarity(doc1_spam, doc2_spam)
sim_1_3 = cosine_similarity(doc1_spam, doc3_legit)

print(f"Similarity (Doc1 vs Doc2 - both spam): {sim_1_2:.3f}")  # ~0.989
print(f"Similarity (Doc1 vs Doc3 - spam vs legit): {sim_1_3:.3f}")  # ~0.202

print("\nInterpretation:")
print("- High similarity (0.989) → both are spam")
print("- Low similarity (0.202) → different document types")

Similarity (Doc1 vs Doc2 - both spam): 0.999
Similarity (Doc1 vs Doc3 - spam vs legit): 0.206

Interpretation:
- High similarity (0.989) → both are spam
- Low similarity (0.202) → different document types


In [11]:
pip install scipy

Looking in indexes: https://pypi.org/simple, https://raghav.dinesh%40ibm.com:****@na.artifactory.swg-devops.com/artifactory/api/pypi/wcp-auditree-pypi-virtual/simple
Collecting scipy
  Downloading https://na.artifactory.swg-devops.com/artifactory/api/pypi/wcp-auditree-pypi-virtual/packages/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl (22.4 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.4/22.4 MB[0m [31m742.2 kB/s[0m  [33m0:00:31[0m[0m eta [36m0:00:01[0m0:02[0m:02[0m
Installing collected packages: scipy
Successfully installed scipy-1.17.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
# Block 7: Curse of Dimensionality
import numpy as np
from scipy.spatial.distance import pdist

# Demonstrate curse of dimensionality
np.random.seed(42)

# Generate random points in various dimensions
n_points = 1000

print("Curse of Dimensionality Demonstration\n")
print("As dimensions increase, distances concentrate:")
print("-" * 50)

for dim in [2, 10, 100, 1000]:
    points = np.random.randn(n_points, dim)
    
    # Compute pairwise distances
    distances = pdist(points)
    
    mean_dist = np.mean(distances)
    std_dist = np.std(distances)
    
    print(f"\nDimension: {dim}")
    print(f"  Mean distance: {mean_dist:.2f}")
    print(f"  Std distance: {std_dist:.2f}")
    print(f"  Relative std: {std_dist/mean_dist:.2f}")

# Output shows: As dim increases, distances concentrate
# (relative std decreases → all points equally far apart)

Curse of Dimensionality Demonstration

As dimensions increase, distances concentrate:
--------------------------------------------------

Dimension: 2
  Mean distance: 1.75
  Std distance: 0.92
  Relative std: 0.52

Dimension: 10
  Mean distance: 4.38
  Std distance: 0.99
  Relative std: 0.23

Dimension: 100
  Mean distance: 14.11
  Std distance: 1.00
  Relative std: 0.07

Dimension: 1000
  Mean distance: 44.74
  Std distance: 0.99
  Relative std: 0.02


In [14]:
# Block 8: Hypersphere Volume
import numpy as np
from math import pi
from scipy.special import gamma

def hypersphere_volume(dim, radius=1):
    """Volume of unit hypersphere in dim dimensions"""
    return (pi**(dim/2) / gamma(dim/2 + 1)) * radius**dim

print("Hypersphere Volume by Dimension\n")
print("-" * 50)

for dim in [2, 5, 10, 50, 100]:
    vol = hypersphere_volume(dim)
    print(f"Dim {dim:3d}: Volume = {vol:.6e}")

print("\nShows: Volume peaks around dim 5-7, then decreases!")

Hypersphere Volume by Dimension

--------------------------------------------------
Dim   2: Volume = 3.141593e+00
Dim   5: Volume = 5.263789e+00
Dim  10: Volume = 2.550164e+00
Dim  50: Volume = 1.730219e-13
Dim 100: Volume = 2.368202e-40

Shows: Volume peaks around dim 5-7, then decreases!


In [15]:
# Block 9: NumPy Array Creation
import numpy as np

# Vectors (1D arrays)
v = np.array([1, 2, 3])
v_zeros = np.zeros(5)
v_ones = np.ones(5)
v_range = np.arange(0, 10, 2)  # [0, 2, 4, 6, 8]
v_linspace = np.linspace(0, 1, 5)  # 5 evenly spaced points

print("Vectors:")
print(f"v: {v}")
print(f"zeros: {v_zeros}")
print(f"ones: {v_ones}")
print(f"range: {v_range}")
print(f"linspace: {v_linspace}")

# Matrices (2D arrays)
M = np.array([[1, 2], [3, 4]])
M_zeros = np.zeros((3, 4))  # 3 rows, 4 columns
M_ones = np.ones((2, 2))
M_identity = np.eye(3)  # 3×3 identity
M_random = np.random.randn(3, 4)  # Normal(0,1)

print("\nMatrices:")
print(f"M:\n{M}")
print(f"zeros (3x4):\n{M_zeros}")
print(f"identity:\n{M_identity}")
print(f"random:\n{M_random}")

Vectors:
v: [1 2 3]
zeros: [0. 0. 0. 0. 0.]
ones: [1. 1. 1. 1. 1.]
range: [0 2 4 6 8]
linspace: [0.   0.25 0.5  0.75 1.  ]

Matrices:
M:
[[1 2]
 [3 4]]
zeros (3x4):
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
identity:
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
random:
[[ 1.08601468  0.86547233 -0.8974248   0.12559306]
 [ 0.46462971 -0.4541347  -0.46967674 -1.59474736]
 [ 1.15433033  1.99228659 -1.55066495 -0.15820592]]


In [16]:
# Block 10: Reshaping and Indexing
import numpy as np

# Reshape arrays
v = np.arange(12)  # [0, 1, ..., 11]
M = v.reshape(3, 4)  # 3×4 matrix

print("Original vector:", v)
print(f"Reshaped to 3x4:\n{M}")

# Flatten matrix to vector
v_flat = M.flatten()
v_ravel = M.ravel()  # Faster, returns view if possible

print(f"Flattened: {v_flat}")

# Transpose
M_T = M.T
print(f"Transposed:\n{M_T}")

# Add dimension
v = np.array([1, 2, 3])
v_col = v[:, np.newaxis]  # Column vector (3, 1)
v_row = v[np.newaxis, :]  # Row vector (1, 3)

print(f"Column vector:\n{v_col}")
print(f"Row vector:\n{v_row}")

# Indexing and slicing
M = np.array([[1, 2, 3],
             [4, 5, 6],
             [7, 8, 9]])

print(f"\nOriginal matrix:\n{M}")
print(f"M[0, 0] = {M[0, 0]}")  # Element at (0, 0)
print(f"First row: {M[0, :]}")
print(f"Second column: {M[:, 1]}")
print(f"Submatrix M[0:2, 1:3]:\n{M[0:2, 1:3]}")

# Boolean indexing
print(f"Elements > 5: {M[M > 5]}")

Original vector: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Reshaped to 3x4:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Flattened: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Transposed:
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
Column vector:
[[1]
 [2]
 [3]]
Row vector:
[[1 2 3]]

Original matrix:
[[1 2 3]
 [4 5 6]
 [7 8 9]]
M[0, 0] = 1
First row: [1 2 3]
Second column: [2 5 8]
Submatrix M[0:2, 1:3]:
[[2 3]
 [5 6]]
Elements > 5: [6 7 8 9]


In [17]:
# Block 11: Broadcasting
import numpy as np

# Scalar + matrix (broadcasts scalar to every element)
M = np.array([[1, 2], [3, 4]])
print(f"M:\n{M}")
print(f"M + 10:\n{M + 10}")

# Vector + matrix (broadcasts along appropriate axis)
v = np.array([10, 20])
print(f"\nv: {v}")
print(f"M + v (adds to each row):\n{M + v}")

# Column vector + matrix
v_col = np.array([[10], [20]])
print(f"\nv_col:\n{v_col}")
print(f"M + v_col (adds to each column):\n{M + v_col}")

M:
[[1 2]
 [3 4]]
M + 10:
[[11 12]
 [13 14]]

v: [10 20]
M + v (adds to each row):
[[11 22]
 [13 24]]

v_col:
[[10]
 [20]]
M + v_col (adds to each column):
[[11 12]
 [23 24]]


In [18]:
# Block 12: Complete Logistic Regression Example
import numpy as np

# Simple logistic regression (forward pass only)

# Generate synthetic data
np.random.seed(42)
n_samples = 100
n_features = 3

# Features (100 samples, 3 features each)
X = np.random.randn(n_samples, n_features)

# Weights (3 features → 1 output)
w = np.random.randn(n_features)
b = 0.5

# Forward pass: z = Xw + b
z = X @ w + b  # Matrix-vector multiply + scalar broadcast
# Shape: (100, 3) @ (3,) + scalar → (100,)

# Sigmoid activation: σ(z) = 1 / (1 + e^(-z))
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

predictions = sigmoid(z)  # Probabilities for each sample

# Classification: threshold at 0.5
classes = (predictions > 0.5).astype(int)

print(f"Predictions shape: {predictions.shape}")  # (100,)
print(f"First 5 predictions: {predictions[:5]}")
print(f"First 5 classes: {classes[:5]}")

print("\n✅ This is logistic regression prediction in 10 lines!")
print("All machine learning is linear algebra operations.")

Predictions shape: (100,)
First 5 predictions: [0.65694127 0.30866138 0.16939837 0.49044651 0.52053288]
First 5 classes: [1 0 0 0 1]

✅ This is logistic regression prediction in 10 lines!
All machine learning is linear algebra operations.


In [19]:
# Block 13: Performance Comparison (Vectorization)
import numpy as np
import time

n = 1000000
x = np.random.randn(n)
y = np.random.randn(n)

# Bad: Python loop
start = time.time()
result_slow = []
for i in range(n):
    result_slow.append(x[i] * y[i] + x[i]**2)
result_slow = np.array(result_slow)
time_slow = time.time() - start

# Good: Vectorized NumPy
start = time.time()
result_fast = x * y + x**2
time_fast = time.time() - start

print(f"Python loop: {time_slow:.3f}s")
print(f"NumPy vectorized: {time_fast:.6f}s")
print(f"Speedup: {time_slow/time_fast:.0f}×")

# Verify results are identical
assert np.allclose(result_slow, result_fast)
print("✅ Results verified identical")

Python loop: 0.634s
NumPy vectorized: 0.010053s
Speedup: 63×
✅ Results verified identical
