# Linear Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 1. Generate synthetic data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)  # 100 data points with one feature
y = 4 + 3 * X + np.random.randn(100, 1)  # y = 4 + 3x + Gaussian noise
# plt.scatter(X,y)

In [None]:
# 2. Add a bias term (x0 = 1) to each instance in X
X_b = np.c_[np.ones((100, 1)), X]  # Concatenates a column of ones to X for the bias term


In [None]:
# 3. Initialize parameters (theta), learning rate, and number of iterations
theta = np.random.randn(2, 1)  # Random initialization of model parameters
learning_rate = 0.05
n_iterations = 100
m = len(X_b)  # Number of samples


In [None]:
# 4. Easier Gradient Calculation function
def compute_gradient(X, y, theta):
    m = len(y)
    predictions = X.dot(theta)  # Predictions: X * theta
    errors = predictions - y  # Errors: difference between predictions and actual y
    gradients = (2/m) * X.T.dot(errors)  # Gradient calculation
    return gradients

In [None]:
# 5. Gradient Descent function
def gradient_descent(X, y, theta, learning_rate, n_iterations):
    loss_history = []
    for epoch in range(n_iterations):
        gradients = compute_gradient(X, y, theta)  # Compute gradients
        theta = theta - learning_rate * gradients  # Update parameters
        loss = (1/m) * np.sum((X.dot(theta) - y) ** 2)  # Mean squared error (MSE)
        loss_history.append(loss)  # Track loss for plotting
    return theta, loss_history

In [None]:
# 6. Perform gradient descent
theta_best, loss_history = gradient_descent(X_b, y, theta, learning_rate, n_iterations)


In [None]:
# 7. Plot the loss over epochs with loss points
plt.plot(loss_history, label="Loss Curve")
plt.scatter(range(len(loss_history)), loss_history, color='red', s=10, label="Loss Points")
plt.title("Loss over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Mean Squared Error (MSE)")
plt.grid(True)
plt.legend()
plt.show()


# 8. Plot the regression line with data points
plt.scatter(X, y, color='blue', label="Data points")
plt.plot(X, X_b.dot(theta_best), color='red', label="Regression line")
plt.title("Linear Regression with Gradient Descent")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()

# Print final model parameters
print(f"Final parameters (theta): \n{theta_best}")

In [2]:
# ---Excercise
# change
# learning_rate
# n_iterations