In [None]:
def step(v, direction, step_size):
    """Move step_size in the direction from v"""
    return [v_i + step_size * direction_i
            for v_i, direction_i in zip(v, direction)]


def sum_of_squares_gradient(v):
    """Gradient of sum of squares function at v"""
    return [2 * v_i for v_i in v]

In [None]:
import random
import math


v = [random.randint(-10, 10) for i in range(3)]
tolerance = 0.0000001

while True:
    gradient = sum_of_squares_gradient(v)   # compute the gradient at v

    next_v = step(v, gradient, -0.01)       # take a negative gradient step
    if math.dist(next_v, v) < tolerance:     # stop if we're converging
        break
    v = next_v                               # continue if we're not

In [None]:
print(v)

In [None]:
# import random, math

# # Define the sum_of_squares function
# def sum_of_squares(v):
#     return sum(i**2 for i in v)

# # Define the sum_of_squares_gradient function
# def sum_of_squares_gradient(v):
#     return [2 * i for i in v]

# # Define the step function
# def step(v, gradient, step_size):
#     return [v_i - step_size * gradient_i for v_i, gradient_i in zip(v, gradient)]

# v = [random.randint(-10,10) for i in range(3)]
# tolerance = 0.0000001

# while True:
#     gradient = sum_of_squares_gradient(v)   # compute the gradient at v
#     step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001]
#     for step_size in step_sizes:
#         next_v = step(v, gradient, -step_size)
#         if sum_of_squares(next_v) < sum_of_squares(v):
#             v = next_v
#             break
#     if math.dist(next_v, v) < tolerance:     # stop if we're converging
#         break

In [2]:
def safe(f):
    """return a new function that's the same as f,
    except that it outputs infinity whenever f produces an error"""
    def safe_f(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        except:
            return float('inf')         # this means "infinity" in Python
    return safe_f


def minimize_batch(target_fn, gradient_fn, theta_0, tolerance=0.000001):
    """use gradient descent to find theta that minimizes target function"""
    step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
    theta = theta_0                           # set theta to initial value
    target_fn = safe(target_fn)               # safe version of target_fn
    value = target_fn(theta)                  # value we're minimizing
    while True:
        gradient = gradient_fn(theta)
        next_thetas = [step(theta, gradient, -step_size)
                       for step_size in step_sizes]
        # choose the one that minimizes the error function
        next_theta = min(next_thetas, key=target_fn)
        next_value = target_fn(next_theta)
        # stop if we're "converging"
        if abs(value - next_value) < tolerance:
            return theta
        else:
            theta, value = next_theta, next_value
            
def negate(f):
    """return a function that for any input x returns -f(x)"""
    return lambda *args, **kwargs: -f(*args, **kwargs)
def negate_all(f):
    """the same when f returns a list of numbers"""
    return lambda *args, **kwargs: [-y for y in f(*args, **kwargs)]
def maximize_batch(target_fn, gradient_fn, theta_0, tolerance=0.000001):
    return minimize_batch(negate(target_fn),
                          negate_all(gradient_fn),
                          theta_0,
                          tolerance)
    



In [3]:
import numpy as np

# Define the target function (mean squared error)
def target_fn(theta, x, y):
    return np.mean((y - np.dot(x, theta)) ** 2)

# Define the gradient of the target function
def gradient_fn(theta, x, y):
    return -2 * np.mean((y - np.dot(x, theta))[:, None] * x, axis=0)

# Define the step function
def step(theta, gradient, step_size):
    return theta - step_size * gradient

# Define the main function
def main():
    # Generate some synthetic data for a simple linear regression
    x = np.random.rand(100, 2)  # 100 points in 2D
    true_theta = np.array([2, -3])
    y = np.dot(x, true_theta) + np.random.normal(scale=0.1, size=100)  # add some noise

    # Initial guess for theta
    theta_0 = np.array([0, 0])

    # Use gradient descent to minimize the target function
    theta = minimize_batch(lambda theta: target_fn(theta, x, y),
                           lambda theta: gradient_fn(theta, x, y),
                           theta_0)

    print("True theta:", true_theta)
    print("Estimated theta:", theta)

if __name__ == "__main__":
    main()