# Estimating f() using g() parameterized by z

In [1]:
import numpy as np

def f(x):
    return x**2

def g(x, z):
    return x**z

def gradient(fn=None, point=0, samples=None):
    epsilon = 1e-5
    grad = (fn(point+epsilon, samples) - fn(point, samples)) / epsilon # finite differences
    return grad

## As an supervised problem

In [2]:
def mse(z, samples):
    return np.mean([(f(x) - g(x, z))**2 for x in samples])

np.random.seed(0)
samples = np.random.uniform(0.1, 1.0, 100)

z = 1.0  # guess
learning_rate = 0.01

# Gradient descent
for idx in range(10000):
    grad = gradient(fn=mse, point=z, samples=samples)
    z -= learning_rate * grad  
    if idx % 1000 == 0: print(f"Estimated parameter z: {z}") 

Estimated parameter z: 1.0010834738516443
Estimated parameter z: 1.521780168707924
Estimated parameter z: 1.7137839621510382
Estimated parameter z: 1.816582606432618
Estimated parameter z: 1.878497548517794
Estimated parameter z: 1.9179828985019056
Estimated parameter z: 1.943992441664986
Estimated parameter z: 1.9614681830785998
Estimated parameter z: 1.9733601792092224
Estimated parameter z: 1.9815205925441315


## As an RL problem

In [3]:
def compute_reward(z, samples):
    mse = np.mean([(f(x) - g(x, z))**2 for x in samples])
    return -mse  # Negative MSE as reward

np.random.seed(0)
samples = np.random.uniform(0.1, 1.0, 100)

z = 1.0  # guess
learning_rate = 0.01

for episode in range(10000):
    grad = gradient(fn=compute_reward, point=z, samples=samples)
    z += learning_rate * grad  # Gradient ascent to maximize reward
    if episode % 1000 == 0:
        print(f"Estimated parameter z: {z}")

Estimated parameter z: 1.0010834738516443
Estimated parameter z: 1.521780168707924
Estimated parameter z: 1.7137839621510382
Estimated parameter z: 1.816582606432618
Estimated parameter z: 1.878497548517794
Estimated parameter z: 1.9179828985019056
Estimated parameter z: 1.943992441664986
Estimated parameter z: 1.9614681830785998
Estimated parameter z: 1.9733601792092224
Estimated parameter z: 1.9815205925441315
