In [1]:
import numpy as np

In [2]:
# average for all theta via 0, 1 columns separately
f1 = lambda theta: theta[:, 0]**2 + theta[:, 1]**2

In [3]:
theta = np.array([[2, 1]])

# By definition

In [4]:
h = 1e-5
v = np.array([[1, 0], [0, 1]])
(f1(theta + h*v) - f1(theta))/h # approximate derivative

array([4.00001, 2.00001])

# By forward derivate via sampling
$$
\nabla_{v} f(\theta) = \frac{f(\theta + h \cdot \vec{v}) - f(\theta)}{h}
$$

In [5]:
np.random.seed(0) # set a seed to keep it reproducible
v = np.random.randn(10000000, 2) # a million vectors of size 2
grad_v_f = ((f1(theta + h*v) - f1(theta)) / h) # ∇ᵥf(θ)

In [6]:
(theta + h*v).shape

(10000000, 2)

In [7]:
theta.shape

(1, 2)

In [8]:
grad_v_f.shape

(10000000,)

In [9]:
v.shape

(10000000, 2)

In [10]:
g = grad_v_f.reshape(-1, 1) * v # scale v with grad_f to compute g with shape (10000000, 2)

In [11]:
g.shape

(10000000, 2)

In [12]:
g.mean(axis=0)

array([3.99884249, 2.00074521])