In [1]:
!pip install numpy
!pip install matplotlib




In [6]:
import numpy as np

def add_bias(X):
    return np.c_[np.ones((X.shape[0], 1)),X]

def standardize(X):
    """
    Standardize features to zero mean and unit variance.
    Returns transformed X, means, stds (to transform new data later).
    """
    mu = X.mean(axis=0) 
    sigma = X.std(axis=0, ddof=0) 
    sigma[sigma==0] = 1.0
    Xs = (X - mu) / sigma
    return Xs, mu, sigma 


def compute_cost(Xb, y, theta, l2=0.0):
    """
    Xb: design matrix with bias column [m x (n+1)]
    y:  [m]
    theta: [(n+1)]
    l2: L2 regularization strength (lambda). Bias not regularized.
    """
    m = len(y)
    residuals = Xb @ theta - y
    mse = (residuals @ residuals) / (2*m)
    if l2 > 0:
        # exclude bias from L2
        mse += (l2 / (2*m)) * (theta[1:] @ theta[1:])
    return mse


def gradient_descent(X,y, alpha=0.1, iters=1000, l2=0.0, tol=None, verbose=False):
     #feature scale 

     Xs,mu,sigma = standardize(X)

     #add bias to column 
     Xb = add_bias(Xs) 
     m,n1=Xb.shape 

     #Init Params 
     rng = np.random.default_rng(0)
     theta = rng.normal(0,0.01,size=n1)
     history = [] 

     prev_cost = None 
     for t in range(iters): 
         #prediction and gradient 
         preds = Xb @ theta 
         error = preds - y 
         grad = (Xb.T @ error)/m 
         if l2 > 0:
            reg = np.r_[0.0, l2 * theta[1:]] / m  # don't regularize bias
            grad += reg
        
        #update 
         theta-=alpha*grad 

        #track cost 
         J=compute_cost(Xb, y, theta, l2=l2)
         history.append(J)
         if verbose and (t % max(1, iters//10) == 0):
            print(f"iter {t:5d}  cost {J:.6f}")

         if tol is not None and prev_cost is not None and abs(prev_cost - J) < tol:
            if verbose:
                print(f"Converged at iter {t} (ΔJ < tol).")
            break
         prev_cost = J
    
    #pack scaler for future use
     scaler = {"mu": mu, "sigma": sigma}
     return theta, scaler, history   

def predict(X, theta, scaler):
    """Predict on new data using learned theta and saved scaler."""
    Xs = (X - scaler["mu"]) / scaler["sigma"]
    Xb = add_bias(Xs)
    return Xb @ theta




In [7]:
X = np.array([
    [1500, 3, 10],
    [1800, 4, 5],
    [2400, 4, 20],
    [3000, 5, 8],
    [3500, 5, 12],
    [2200, 3, 15],
    [2600, 4, 7],
    [2800, 4, 9],
], dtype=float)

y = np.array([400, 500, 600, 650, 700, 520, 590, 620], dtype=float)  # in $1,000s

theta, scaler, history = gradient_descent(
    X, y,
    alpha=0.1,
    iters=5000,
    l2=0.0,     # try 0.1 to see L2 effect
    tol=1e-9,
    verbose=False
)

print("Final cost:", history[-1])
print("Theta (bias + weights):", theta)


Final cost: 141.2623590845302
Theta (bias + weights): [572.5         65.72145678  24.35612313  10.48054746]
