In [50]:
# Simple Linear Regression Using OLS and Gradient Descent



In [50]:
!pip install numpy



In [49]:
import numpy as np

In [51]:
# Find the parameters using the Ordinary Least Squares (OLS) Method
# SimpleLinearRegression involves only a single feature (x) and the target value is a linear estimator
# y_pred = w0 + w1 * x
class SimpleLinearRegression:
    def __init__(self):
        self.slope = None
        self.intercept = None

    def fit(self, x, y):
        """
        Calculates the slope (w1) and intercept (w0) using the least squares method.
        x and y should be 1-dimensional arrays/lists.
        """
        # Calculate the mean of the input (x) and output data (y)
        x_mean = np.mean(x)
        y_mean = np.mean(y)

        # Calculate the terms needed for the slope and intercept using the formulae that we derived
        numerator = np.sum((x - x_mean) * (y - y_mean))
        denominator = np.sum((x - x_mean) ** 2)

        # Calculate the slope (w1) and intercept (w0)
        self.slope = numerator / denominator
        self.intercept = y_mean - self.slope * x_mean

    def predict(self, x):
        """
        Makes predictions using the calculated slope and intercept or in common terms the weights (y = w0 + w1*x).
        """
        if self.slope is None or self.intercept is None:
            raise Exception("Model not trained yet. Call fit() first.")
        return self.intercept + self.slope * x

# --- Example Usage ---
# Sample data: e.g., Years of Experience vs. Salary (in lakhs per annum)
x_data = np.array([1, 2, 3, 4, 5])
y_data = np.array([0.9, 2.5, 3.6, 3.5, 4.6])

# Create and train the model
model = SimpleLinearRegression()
model.fit(x_data, y_data)

# Print the results
print(f"Simple linear equation: y = {model.intercept:.2f} + {model.slope:.2f}x") # Correction for variable names
print(f"Slope (Coefficient): {model.slope}")
print(f"Intercept: {model.intercept}")

# Make a prediction for a new value (e.g., 6 years of experience)
new_x = np.array([6])
prediction = model.predict(new_x)
print(f"Prediction for x={new_x[0]}: {prediction[0]:.2f}")


Simple linear equation: y = 0.50 + 0.84x
Slope (Coefficient): 0.8399999999999999
Intercept: 0.5000000000000004
Prediction for x=6: 5.54


In [2]:
from sklearn.metrics import mean_squared_error

In [9]:
# Measure the mean squared error for the whole training data
# usually it is done on a validation data where the actual target values are present
pred_y_data = model.predict(x_data)
# the above finds the predicted values on the input x_data
# Print the predicted y values
print(pred_y_data)
mean_sq_error_y = mean_squared_error(y_data, pred_y_data)

[1.34 2.18 3.02 3.86 4.7 ]


In [8]:
mean_sq_error_y

0.15440000000000004

In [10]:
# Simple Linear Regression Using Gradient Descent

In [11]:
# We will use the same dataset

In [34]:
# initialization of the parameters
# let us assume w0 = 0, w1 = 0 (any initialization will work, but the algorithm will not converge)
w0, w1 = 0, 0

In [36]:
# Define the learning rate or step size
lr = 0.0001

In [43]:
# Apply gradient descent
# Gradient descent is dependent on the loss function that we want to minimize
# we generally use Mean Squared Error (MSE) or Sum of Squared Error (SSE)
# Let us take the Sum of Squared Error (SSE)
# L = sum_{i=1 to n} (y_i - y_i_pred) ^ 2, apply gradient descent as
# w_new = w_old - lr * ∂L/∂w
# there are two ways to end the gradient descent algorithm
# 1. Upto maximum iterations 2. Until convergence (when the parameters do not change)
# Let us take batch training
max_iterations = 100 # if the number of iteration is very high, then the solution will diverge
w0, w1 = 1, 1 # initializing to 0 will have issues, figure that out
# you can randomly initialize
# w0, w1 = np.random.random(2)
print(w0, w1)
def batch_gradient_descent_with_max_iterations(w0, w1, max_iterations):
    """Find the parameters using the maximum iterations."""
    for i in range(max_iterations):
        total_update_in_w0 = 0
        total_update_in_w1 = 0
        for index in range(y_data.shape[0]):
            pred_y_data = w0 + w1 * x_data[index]
            update_in_w0 = y_data[index] - pred_y_data
            update_in_w1 = (y_data[index] - pred_y_data) * x_data[index]
            total_update_in_w0 += update_in_w0
            total_update_in_w1 += update_in_w1
        w0 -= lr * total_update_in_w0  # collect updates from all samples and update the parameters
        w1 -= lr * total_update_in_w1
    return w0, w1

def predict_target_values(w0, w1, x):
    """Predict target values using a linear regression model or weights."""
    predicted_targets = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        predicted_targets[i] = w0 + w1 * x[i]
    return predicted_targets

w0, w1 = batch_gradient_descent_with_max_iterations(w0, w1, max_iterations)
predicted_targets_using_batch_gd = predict_target_values(w0, w1, x_data)
print(y_data)
print(predicted_targets_using_batch_gd)
mean_squared_error_batch_gd = mean_squared_error(y_data, predicted_targets_using_batch_gd)
print(mean_squared_error_batch_gd)

1 1
[0.9 2.5 3.6 3.5 4.6]
[2.28709235 3.50888415 4.73067596 5.95246776 7.17425956]
3.3723421863806697


In [44]:
def batch_gradient_descent_till_convergence(w0, w1):
    """Find the parameters till convergence i.e. parameters do not change or the change in negligible."""
    prev_w0, prev_w1 = w0, w1
    tolerance = 0.001
    for i in range(max_iterations):
        total_update_in_w0 = 0
        total_update_in_w1 = 0
        for index in range(y_data.shape[0]):
            pred_y_data = w0 + w1 * x_data[index]
            update_in_w0 = y_data[index] - pred_y_data
            update_in_w1 = (y_data[index] - pred_y_data) * x_data[index]
            total_update_in_w0 += update_in_w0
            total_update_in_w1 += update_in_w1
        w0 -= lr * total_update_in_w0  # collect updates from all samples and update the parameters
        w1 -= lr * total_update_in_w1
        if abs(w0 - prev_w0) <= tolerance and abs(w1 - prev_w1) <= tolerance:
            break
        else:
            prev_w0, prev_w1 = w0, w1
    return w0, w1

In [48]:
w0, w1 = 1, 1
w0, w1 = batch_gradient_descent_till_convergence(w0, w1)
predicted_targets_using_batch_gd_converged = predict_target_values(w0, w1, x_data)
print(y_data)
print(predicted_targets_using_batch_gd_converged)
mean_squared_error_batch_gd_converged = mean_squared_error(y_data, predicted_targets_using_batch_gd_converged)
print(mean_squared_error_batch_gd_converged)

[0.9 2.5 3.6 3.5 4.6]
[2.28709235 3.50888415 4.73067596 5.95246776 7.17425956]
3.3723421863806697
