# Optional Lab: Multiple Variable Linear Regression
# 2 Problem Statement

In [1]:
X_train = [2104 5 1 45; 1416 3 2 40; 852 2 1 35]
y_train = [460, 232, 178];

## 2.1 Matrix X containing our examples

In [2]:
# data is stored in numpy array/matrix
println("X Shape: $(size(X_train)), X Type: $(eltype(X_train))")
println(X_train)
println("y Shape: $(size(y_train)), y Type:$(eltype(y_train))")
println(y_train)

X Shape: (3, 4), X Type: Int64


[2104 5 1 45; 1416 3 2 40; 852 2 1 35]


y Shape: (3,), y Type:Int64


[460, 232, 178]


## 2.2 Parameter vector w, b

In [3]:
b_init = 785.1811367994083
w_init = [ 0.39133535, 18.75376741, -53.36032453, -26.42131618]
println("w_init shape: $(size(w_init)), b_init type: $(eltype(b_init))")

w_init shape: (4,), b_init type: Float64


# 3 Model Prediction With Multiple Variables
## 3.1 Single Prediction element by element

In [4]:
range(0, 9)

0:9

In [5]:
function predict_single_loop(x, w, b)
    """
    single predict using linear regression

    Args:
    x (ndarray): Shape (n,) example with multiple features
    w (ndarray): Shape (n,) model parameters    
    b (scalar):  model parameter     

    Returns:
    p (scalar):  prediction
    """
    n = size(x)[1]
    p = 0
    for i in 1:n
        pⁱ = x[i] * w[i]  
        p = p + pⁱ
    end
    p + b
end

predict_single_loop (generic function with 1 method)

In [6]:
# get a row from our training data
x_vec = X_train[1, :]
println("x_vec shape $(size(x_vec)), x_vec value: $(x_vec)")

# make a prediction
f_wb = predict_single_loop(x_vec, w_init, b_init)
println("f_wb shape $(size(f_wb)), prediction: $f_wb")

x_vec shape (4,), x_vec value: [2104, 5, 1, 45]
f_wb shape (), prediction: 459.9999976194083


## 3.2 Single Prediction, vector

In [7]:
using LinearAlgebra

function predict(x, w, b)
    """
    single predict using linear regression
    Args:
      x (ndarray): Shape (n,) example with multiple features
      w (ndarray): Shape (n,) model parameters   
      b (scalar):             model parameter 
      
    Returns:
      p (scalar):  prediction
    """
    x ⋅ w + b
end

predict (generic function with 1 method)

In [8]:
# get a row from our training data
x_vec = X_train[1, :]
println("x_vec shape $(size(x_vec)), x_vec value: $x_vec")

# make a prediction
f_wb = predict(x_vec, w_init, b_init)
println("f_wb shape $(size(f_wb)), prediction: $f_wb")

x_vec shape (4,), x_vec value: [2104, 5, 1, 45]
f_wb shape (), prediction: 459.9999976194083


# 4 Compute Cost With Multiple Variables

In [14]:
function compute_cost(X, y, w, b)
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """
    m = size(X)[1]
    cost = 0.0
    for i in 1:m                                
        f_wb_i = X[i, :] ⋅ w + b # (n,)(n,) = scalar (see np.dot)
        cost = cost + (f_wb_i - y[i])^2 # scalar
    end
    cost / (2 * m) # scalar
end

compute_cost (generic function with 1 method)

In [15]:
# Compute and display cost using our pre-chosen optimal parameters. 
cost = compute_cost(X_train, y_train, w_init, b_init)
println("Cost at optimal w : $cost")

Cost at optimal w : 1.5578904428966628e-12


# 5 Gradient Descent With Multiple Variables
## 5.1 Compute Gradient with Multiple Variables

In [16]:
function compute_gradient(X, y, w, b)
    """
    Computes the gradient for linear regression 
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. 
      dj_db (scalar):       The gradient of the cost w.r.t. the parameter b. 
    """
    m, n = size(X) #(number of examples, number of features)
    dj_dw = zeros(n,)
    dj_db = 0.

    for i in 1:m                             
        err = (X[i, :] ⋅ w + b) - y[i]   
        for j in 1:n                      
            dj_dw[j] = dj_dw[j] + err * X[i, j]
        end
        dj_db = dj_db + err
    end
    dj_dw = dj_dw / m                           
    dj_db = dj_db / m

    dj_db, dj_dw
end

compute_gradient (generic function with 1 method)

In [19]:
#Compute and display gradient 
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
println("dj_db at initial w,b: $tmp_dj_db")
println("dj_dw at initial w,b: $tmp_dj_dw")

dj_db at initial w,b: -1.6739251501955248e-6


dj_dw at initial w,b: [-0.0027262357719640327, -6.271972627776752e-6, -2.217455782253334e-6, -6.92403390682254e-5]
