### Write a function to implement linear regression from scratch using gradient descent.

In [None]:
# import numpy as np

# def linear_regression(X, y, learning_rate=0.01, num_iterations=1000):
#     n_samples, n_features = X.shape
#     weights = np.zeros(n_features)
#     bias = 0

#     for _ in range(num_iterations):
#         y_pred = np.dot(X, weights) + bias
#         print(y_pred)
#         d_weights = (1/n_samples) * np.dot(X.T, (y_pred - y))
#         d_bias = (1/n_samples) * np.sum(y_pred - y)
#         weights -= learning_rate * d_weights
#         bias -= learning_rate * d_bias

#     return weights, bias

# # Example usage
# X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
# y = np.array([3, 5, 7, 9])
# weights, bias = linear_regression(X, y)
# print("weights:", weights)
# print("Bias:", bias)

Converged at iteration 638
Weights: [1.11803122 1.11803122]
Bias: 5.990249192151607
Final cost: 4.850437962270154e-05
Predictions: [2.99025662 4.99025167 6.99024672 8.99024177]


In [None]:
import numpy as np

def linear_regression(X, y, learning_rate=0.01, num_iterations=1000, tolerance=1e-6):
    # Get dimensions
    n_samples, n_features = X.shape
    
    # Initialize parameters
    weights = np.zeros(n_features)
    bias = 0
    
    # Store costs for monitoring convergence
    costs = []
    
    for iteration in range(num_iterations):
        # Forward pass
        y_pred = np.dot(X, weights) + bias
        
        # Calculate cost (MSE)
        cost = (1/(2*n_samples)) * np.sum((y_pred - y) ** 2)    # the 1/2 is for convenience in derivative calculation
        costs.append(cost)
        
        # Calculate gradients
        # partial derivative of the cost function (MSE) with respect to weights
        # need to understand more about "why dot product?"
        d_weights = (1/n_samples) * np.dot(X.T, (y_pred - y))   # Matrix multiplication that gives us the gradient for each weight
        # partial derivative of the cost function (MSE) with respect to bias
        d_bias = (1/n_samples) * np.sum(y_pred - y)
        
        # Update parameters
        weights -= learning_rate * d_weights
        bias -= learning_rate * d_bias
        
        # Check convergence
        if iteration > 0 and abs(costs[-1] - costs[-2]) < tolerance:
            print(f"Converged at iteration {iteration}")
            break
            
    return weights, bias, costs

# Example usage with normalized features
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y = np.array([3, 5, 7, 9])

# Normalize features
X_normalized = (X - X.mean(axis=0)) / X.std(axis=0)

# Train model
weights, bias, costs = linear_regression(X_normalized, y)

# Print results
print("Weights:", weights)
print("Bias:", bias)
print("Final cost:", costs[-1])

# Make predictions
y_pred = np.dot(X_normalized, weights) + bias
print("Predictions:", y_pred)

Converged at iteration 638
Weights: [1.11803122 1.11803122]
Bias: 5.990249192151607
Final cost: 4.850437962270154e-05
Predictions: [2.99025662 4.99025167 6.99024672 8.99024177]
