In [2]:
import copy, math
import numpy as np

In [3]:
def predict(x, w, b):
    """
    single predict using linear regression
    Args:
      x (ndarray): Shape (n,) example with multiple features
      w (ndarray): Shape (n,) model parameters
      b (scalar):             model parameter

    Returns:
      p (scalar):  prediction
    """
    p = np.dot(x, w) + b
    return p

In [4]:
def cost(X, y, w, b):
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters
      b (scalar)       : model parameter

    Returns:
      cost (scalar): cost
    """
    m = X.shape[0]
    cost = 0
    for i in range(m):
        f = predict(X[i], w, b)
        cost += (f - y[i]) ** 2
    cost /= (2 * m)
    return cost

In [5]:
def gradient(X, y, w, b):
    """
    Computes the gradient for linear regression
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters
      b (scalar)       : model parameter

    Returns:
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w.
      dj_db (scalar):       The gradient of the cost w.r.t. the parameter b.
    """
    m, n = X.shape
    dj_dw = np.zeros((n, ))
    dj_db = 0
    for i in range(m):
        err = (predict(X[i], w, b) - y[i])
        for j in range(n):
            dj_dw += err * X[i, j]
        dj_db += err

    dj_dw /= m
    dj_db /= m

    return dj_dw, dj_db

In [6]:
def gradient_descent(X, y, w_in, b_in, cost_func, gradient_func, alpha, num_iters):
    """
    Performs batch gradient descent to learn theta. Updates theta by taking
    num_iters gradient steps with learning rate alpha

    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters
      b_in (scalar)       : initial model parameter
      cost_func       : function to compute cost
      gradient_func   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent

    Returns:
      w (ndarray (n,)) : Updated values of parameters
      b (scalar)       : Updated value of parameter
      """
    J_hist = []
    w = copy.deepcopy(w_in)
    b = b_in

    for i in range(num_iters):
        dj_dw, dj_db = gradient_func(X, y, w, b)
        w -= alpha * dj_dw
        b -= alpha * dj_db

        if i < 100000:
            J_hist.append(cost_func(X, y, w, b))

        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_hist[-1]:8.2f}   ")

        return w, b, J_hist

In [7]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])

# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b, cost, gradient, alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration    0: Cost  4073.16   
b,w found by gradient descent: 0.00,[0.248112 0.248112 0.248112 0.248112] 
prediction: 534.68, target value: 460
prediction: 362.49, target value: 232
prediction: 220.82, target value: 178


In [8]:
a = np.arange(4, 10, 0.1)
print(f'{a}')
b = np.random.rand(10)
print(f'{b}')

[4.  4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.  5.1 5.2 5.3 5.4 5.5 5.6 5.7
 5.8 5.9 6.  6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7.  7.1 7.2 7.3 7.4 7.5
 7.6 7.7 7.8 7.9 8.  8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9 9.  9.1 9.2 9.3
 9.4 9.5 9.6 9.7 9.8 9.9]
[0.30540693 0.63981301 0.09650967 0.48017494 0.16558147 0.96101865
 0.69147737 0.82936124 0.09019627 0.39290061]


In [9]:
def z_score_normalization(X):
    mean = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)

    normalized = (X - mean) / sigma
    return normalized, mean, sigma

In [10]:
z_score_normalization(X_train)

(array([[ 1.26311506,  1.33630621, -0.70710678,  1.22474487],
        [-0.08073519, -0.26726124,  1.41421356,  0.        ],
        [-1.18237987, -1.06904497, -0.70710678, -1.22474487]]),
 array([1.45733333e+03, 3.33333333e+00, 1.33333333e+00, 4.00000000e+01]),
 array([5.11961804e+02, 1.24721913e+00, 4.71404521e-01, 4.08248290e+00]))

## Scikit-Learn

In [11]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [14]:
scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

Peak to Peak range by column in Raw        X:[1252    3    1   10]
Peak to Peak range by column in Normalized X:[2.44549494 2.40535118 2.12132034 2.44948974]


In [15]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_train, y_train)
print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

SGDRegressor()
number of iterations completed: 6, number of weight updates: 19.0


In [17]:
b = sgdr.intercept_
w = sgdr.coef_
print(f"model parameters:                   w: {w}, b:{b}")

model parameters:                   w: [-6.04768198e+11  3.37882112e+08 -8.62889125e+09 -1.20429037e+11], b:[-4.01900938e+09]


In [18]:
sgdr.predict(X_norm)

array([-9.08852613e+11,  3.25136679e+10,  8.64281917e+11])