In [1]:
import numpy as np

x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

X = np.vstack([np.ones(len(x)), x]).T
beta_hat_normal_eq = np.linalg.inv(X.T @ X) @ X.T @ y
y_hat_normal_eq = X @ beta_hat_normal_eq
SSE_normal_eq = np.sum((y - y_hat_normal_eq)**2)
R_squared_normal_eq = 1 - SSE_normal_eq / np.sum((y - np.mean(y))**2)

print("Analytic Solution:")
print(f"Coefficients: {beta_hat_normal_eq}")
print(f"Sum of Squared Errors: {SSE_normal_eq}")
print(f"R^2 value: {R_squared_normal_eq}") 
def full_batch_gradient_descent(x, y, lr=0.01, epochs=1000):
    m, b = 0.0, 0.0
    N = len(y)
    for _ in range(epochs):
        y_pred = m*x + b
        dm = (-2/N) * np.sum(x * (y - y_pred))
        db = (-2/N) * np.sum(y - y_pred)
        m -= lr * dm
        b -= lr * db
    return m, b
m_gd, b_gd = full_batch_gradient_descent(x, y)
y_pred_gd = m_gd*x + b_gd
sse_gd = np.sum((y - y_pred_gd)**2)
r_squared_gd = 1 - sse_gd / np.sum((y - np.mean(y))**2)

print("\nFull-batch Gradient Descent:")
print(f"Coefficients: {m_gd, b_gd}")
print(f"Sum of Squared Errors: {sse_gd}")
print(f"R^2 value: {r_squared_gd}")
def stochastic_gradient_descent(x, y, lr=0.01, epochs=1000):
    m, b = 0.0, 0.0
    N = len(y)
    for _ in range(epochs):
        for i in range(N):
            y_pred = m*x[i] + b
            dm = -2 * x[i] * (y[i] - y_pred)
            db = -2 * (y[i] - y_pred)
            m -= lr * dm
            b -= lr * db
    return m, b
m_sgd, b_sgd = stochastic_gradient_descent(x, y)
y_pred_sgd = m_sgd*x + b_sgd
sse_sgd = np.sum((y - y_pred_sgd)**2)
r_squared_sgd = 1 - sse_sgd / np.sum((y - np.mean(y))**2)

print("\nStochastic Gradient Descent:")
print(f"Coefficients: {m_sgd, b_sgd}")
print(f"Sum of Squared Errors: {sse_sgd}")
print(f"R^2 value: {r_squared_sgd}")


Analytic Solution:
Coefficients: [1.23636364 1.16969697]
Sum of Squared Errors: 5.624242424242426
R^2 value: 0.952538038613988

Full-batch Gradient Descent:
Coefficients: (1.170263693076768, 1.2328099487610318)
Sum of Squared Errors: 5.624278989977716
R^2 value: 0.9525377300423822

Stochastic Gradient Descent:
Coefficients: (1.2986755729435908, 0.8967040680508923)
Sum of Squared Errors: 7.576246971879953
R^2 value: 0.9360654263976376


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
boston_data = pd.read_csv("BostonHousing.csv")
print(boston_data.head())
X = boston_data.drop(columns=["medv"]) 
y = boston_data["medv"] 
correlation_coeffs = np.abs(boston_data.corr()["medv"]).drop("medv")
best_feature_name = correlation_coeffs.idxmax()

print(f"The attribute that best follows the linear relationship with the output price is: {best_feature_name}")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_with_bias = np.c_[np.ones((X_scaled.shape[0], 1)), X_scaled]
analytic_solution = np.linalg.inv(X_with_bias.T.dot(X_with_bias)).dot(X_with_bias.T).dot(y)

print("Analytic solution coefficients:", analytic_solution)
class LinearRegressionGradientDescent:
    def __init__(self, lr=0.0001, max_iter=10000, tolerance=1e-6):
        self.lr = lr
        self.max_iter = max_iter
        self.tolerance = tolerance

    def fit(self, X, y):
        X_with_bias = np.c_[np.ones((X.shape[0], 1)), X]
        self.weights = np.zeros(X_with_bias.shape[1])

        for _ in range(self.max_iter):
            gradients = -2 * X_with_bias.T.dot(y - X_with_bias.dot(self.weights))
            if np.all(np.abs(gradients) < self.tolerance):
                break
            self.weights -= self.lr * gradients

    def predict(self, X):
        X_with_bias = np.c_[np.ones((X.shape[0], 1)), X]
        return X_with_bias.dot(self.weights)
lr_full_batch_scaled = LinearRegressionGradientDescent(lr=0.0001, max_iter=10000)
lr_full_batch_scaled.fit(X_scaled, y)

print("Gradient descent (Full-batch) coefficients (scaled features):", lr_full_batch_scaled.weights)
class LinearRegressionStochasticGradientDescent:
    def __init__(self, lr=0.0001, max_iter=1000, tolerance=1e-6, batch_size=1):
        self.lr = lr
        self.max_iter = max_iter
        self.tolerance = tolerance
        self.batch_size = batch_size
    def fit(self, X, y):
        X_with_bias = np.c_[np.ones((X.shape[0], 1)), X]
        self.weights = np.zeros(X_with_bias.shape[1])

        for _ in range(self.max_iter):
            indices = np.random.permutation(X_with_bias.shape[0])
            for start_idx in range(0, X_with_bias.shape[0], self.batch_size):
                batch_indices = indices[start_idx:start_idx + self.batch_size]
                X_batch = X_with_bias[batch_indices]
                y_batch = y[batch_indices]
                gradients = -2 * X_batch.T.dot(y_batch - X_batch.dot(self.weights))
                if np.all(np.abs(gradients) < self.tolerance):
                    break
                self.weights -= self.lr * gradients
    def predict(self, X):
        X_with_bias = np.c_[np.ones((X.shape[0], 1)), X]
        return X_with_bias.dot(self.weights)
lr_stochastic_scaled = LinearRegressionStochasticGradientDescent(lr=0.0001, max_iter=10000, batch_size=1)
lr_stochastic_scaled.fit(X_scaled, y)
print("Gradient descent (Stochastic) coefficients (scaled features):", lr_stochastic_scaled.weights)

      crim    zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        b  lstat  medv  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  
The attribute that best follows the linear relationship with the output price is: lstat
Analytic solution coefficients: [ 2.25328063e+01 -9.28146064e-01  1.08156863e+00  1.40899997e-01
  6.81739725e-01 -2.05671827e+00  2.67423017e+00  1.94660717e-02
 -3.10404426e+00  2.66221764e+00 -2.07678168e+00 -2.06060666e+00
  8.49268418e-01 -3.74362713e+00]
Gradient descent (Fu