In [4]:
import torch
from matplotlib import pyplot as plt

# Generate regression data
def regression_data(n=100, w=torch.Tensor([-0.7, 0.5]), x_max=1):
    x = torch.rand(n) * x_max
    y = x * w[1] + w[0] + 0.05 * torch.rand(n)
    return x, y

# Define the empirical risk (mean squared error)
def empirical_risk(x, y, w, b):
    y_pred = w * x + b
    return torch.mean((y_pred - y)**2)

# Stochastic Gradient Descent (SGD) algorithm
def sgd_algorithm(x, y, w_init=torch.rand(2), lr=0.1, epochs=100):
    w = torch.tensor(w_init.clone(), requires_grad=True)
    b = torch.tensor(torch.rand(1), requires_grad=True)
    
    empirical_risks = []
    
    for epoch in range(epochs):
        # Shuffle indices for each epoch
        indices = torch.randperm(x.shape[0])
        
        for idx in indices:
            x_i = x[idx]
            y_i = y[idx]
            
            # Compute gradients of the empirical risk (MSE) w.r.t. w and b
            loss = (w * x_i + b - y_i)**2
            loss.backward()
            
            # Update parameters using gradients
            with torch.no_grad():
                w -= lr * w.grad
                b -= lr * b.grad
                
                # Manually zero the gradients after updating
                w.grad.zero_()
                b.grad.zero_()
        
        # Compute and store empirical risk at the end of each epoch
        loss_epoch = empirical_risk(x, y, w, b)
        empirical_risks.append(loss_epoch.item())
    
    return w, b, empirical_risks

# Generate data
x, y = regression_data()

# Run SGD algorithm
w_hat, b_hat, empirical_risks = sgd_algorithm(x, y)

# Plot empirical risk evolution during a single epoch
plt.figure(fig_size=(10, 5))
plt.plot(empirical_risks, label='Empirical Risk')
plt.xlabel('Iterations')
plt.ylabel('Empirical Risk')
plt.title('Empirical Risk Evolution During a Single Epoch')
plt.legend()
plt.show()

# Plot empirical risk evolution over 100 epochs
_, _, empirical_risks_over_epochs = sgd_algorithm(x, y, epochs=100)

plt.figure(fig_size=(10, 5))
plt.plot(empirical_risks_over_epochs, label='Empirical Risk')
plt.xlabel('Epochs')
plt.ylabel('Empirical Risk')
plt.title('Empirical Risk Evolution Over 100 Epochs')
plt.legend()
plt.show()


  w = torch.tensor(w_init.clone(), requires_grad=True)
  b = torch.tensor(torch.rand(1), requires_grad=True)


RuntimeError: grad can be implicitly created only for scalar outputs