Rina Buoy, PhD

## Generate Training Datasets

$\hat{y}= \theta_0 + \theta_1x_1 + \epsilon$

$\theta_0 = 4$

$\theta_1 = 3$


In [5]:
import numpy as np
import matplotlib.pyplot as plt

# Generate some random data for demonstration
np.random.seed(0)
X = 2 * np.random.rand(100, 1) # epsilon 
y = 4 + 3 * X + np.random.randn(100, 1) # y_hat




In [None]:

plt.scatter(X, y)
plt.xlabel('X')
plt.ylabel('y')

## Gradient Descent

In [None]:
# Define the learning rate and number of iterations
learning_rate = 0.1
n_iterations = 10

# Add a column of ones to X for the bias term
X_b = np.c_[np.ones((100, 1)), X]

# Function to compute the loss (mean squared error)
def compute_loss(theta, X, y):
    error = y - X.dot(theta)
    loss = np.mean(error**2)
    return loss

# Function to compute the gradients
def compute_gradients(theta, X, y):
    gradients = -2/X.shape[0] * X.T.dot(y - X.dot(theta))
    return gradients

# Function to perform gradient descent
def gradient_descent(X, y, learning_rate, n_iterations):
    theta = np.random.randn(2,1)  # Random initialization of parameters
    theta_trajectory = []
    losses = []
    for iteration in range(n_iterations):
        gradients = compute_gradients(theta, X, y)
        theta = theta - learning_rate * gradients
        theta_trajectory.append(theta)
        losses.append(compute_loss(theta, X, y))

        plt.figure()

        plt.scatter(X[:,-1], y)
        plt.plot(X[:,-1], X.dot(theta), color='red')
        plt.xlabel('X')
        plt.ylabel('y')
        plt.title(f'Gradient Descent Linear Regression : Iter {iteration}')
    return theta,theta_trajectory,losses

# Perform gradient descent
theta,theta_trajectory,losses = gradient_descent(X_b, y, learning_rate, n_iterations)

# Print the parameters obtained by gradient descent
print("Parameters obtained by gradient descent:", theta)

# Plot the data and the linear regression line

#plt.show()


## Loss Surface

In [None]:


theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-10, 10, 100)
theta0_mesh, theta1_mesh = np.meshgrid(theta0_vals, theta1_vals)
loss_surface = np.zeros_like(theta0_mesh)
for i in range(len(theta0_vals)):
    for j in range(len(theta1_vals)):
        theta_ij = np.array([[theta0_mesh[i,j]], [theta1_mesh[i,j]]])
        loss_surface[i,j] = compute_loss(theta_ij, X_b, y)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(theta0_mesh, theta1_mesh, loss_surface, cmap='viridis')
ax.set_xlabel('Theta 0')
ax.set_ylabel('Theta 1')
ax.set_zlabel('Loss')
ax.set_title('Loss Surface')
plt.show()

## Learning Trajectory

In [None]:
# Plot the loss surface
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-10, 10, 100)
theta0_mesh, theta1_mesh = np.meshgrid(theta0_vals, theta1_vals)
loss_surface = np.zeros_like(theta0_mesh)
for i in range(len(theta0_vals)):
    for j in range(len(theta1_vals)):
        theta_ij = np.array([[theta0_mesh[i,j]], [theta1_mesh[i,j]]])
        loss_surface[i,j] = compute_loss(theta_ij, X_b, y)

# Plot the trajectory
theta_trajectory = np.array(theta_trajectory).squeeze()
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(theta0_mesh, theta1_mesh, loss_surface, cmap='viridis', alpha=0.5)
ax.plot(theta_trajectory[:,0], theta_trajectory[:,1], losses, color='red', marker='o')
ax.set_xlabel('Theta 0')
ax.set_ylabel('Theta 1')
ax.set_zlabel('Loss')
ax.set_title('Gradient Descent Trajectory')
plt.show()

## Mini-Batch Training

In [None]:


# Define the learning rate and number of iterations
learning_rate = 0.1
n_iterations = 2
batch_size = 20  # Mini-batch size

# Function to compute the loss (mean squared error)
def compute_loss(theta, X, y):
    error = y - X.dot(theta)
    loss = np.mean(error**2)
    return loss

# Function to compute the gradients using a mini-batch
def compute_gradients(theta, X_batch, y_batch):
    gradients = -2/X_batch.shape[0] * X_batch.T.dot(y_batch - X_batch.dot(theta))
    return gradients

# Function to perform gradient descent with mini-batch training
def gradient_descent(X, y, learning_rate, n_iterations, batch_size):
    theta = np.random.randn(2,1)  # Random initialization of parameters
    theta_trajectory = [theta]
    loss = compute_loss(theta, X, y)
    losses = [loss ]
    m = len(X)
    for iteration in range(n_iterations):
        shuffled_indices = np.random.permutation(m)
        X_shuffled = X[shuffled_indices]
        y_shuffled = y[shuffled_indices]
        for i in range(0, m, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]
            gradients = compute_gradients(theta, X_batch, y_batch)
            theta = theta - learning_rate * gradients
            loss = compute_loss(theta, X, y)
            theta_trajectory.append(theta)
            losses.append(loss)
            plt.figure()

            plt.scatter(X[:,-1], y)
            plt.plot(X[:,-1], X.dot(theta), color='red')
            plt.xlabel('X')
            plt.ylabel('y')
            plt.title(f'Gradient Descent Linear Regression : Iteration {iteration} Batch {i}')
        
    return theta_trajectory, losses

# Perform gradient descent with mini-batch training
theta_trajectory,losses = gradient_descent(X_b, y, learning_rate, n_iterations, batch_size)




In [None]:
# Plot the loss surface
theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-10, 10, 100)
theta0_mesh, theta1_mesh = np.meshgrid(theta0_vals, theta1_vals)
loss_surface = np.zeros_like(theta0_mesh)
for i in range(len(theta0_vals)):
    for j in range(len(theta1_vals)):
        theta_ij = np.array([[theta0_mesh[i,j]], [theta1_mesh[i,j]]])
        loss_surface[i,j] = compute_loss(theta_ij, X_b, y)

# Plot the trajectory
theta_trajectory = np.array(theta_trajectory).squeeze()
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(theta0_mesh, theta1_mesh, loss_surface, cmap='viridis', alpha=0.5)
ax.plot(theta_trajectory[:,0], theta_trajectory[:,1], losses, color='red', marker='o')
ax.set_xlabel('Theta 0')
ax.set_ylabel('Theta 1')
ax.set_zlabel('Loss')
ax.set_title('Gradient Descent Trajectory with Mini-Batch Training')
plt.show()