In [None]:
import numpy as np
import matplotlib.pyplot as plt
import copy, math

In [None]:
def read_csv_to_array(filename):
    data = np.genfromtxt(filename, delimiter=',', dtype=float)
    return data

In [None]:
def compute_gradient(X, y, w, b):
    m,n = X.shape           #(number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0
    for i in range(m):
        f_wb = np.dot(X[i], w) + b
        err = f_wb - y[i]
        for j in range(n):
            dj_dw[j] += err * X[i, j]
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db

In [None]:
def compute_cost(X, y, w, b):
    # mean squared error
    cost = 0
    m = y.shape[0]
    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b
        cost += (f_wb_i - y[i])**2
    cost /= 2 * m;
    return cost

In [None]:
def gradient_descent(X, y, w_in, b_in=0, alpha=0.01, num_iterations=10000):
    cost_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    for i in range(num_iterations):
        dj_dw, dj_db = compute_gradient(X, y, w, b)
        b = b - alpha * dj_db
        w = w - alpha * dj_dw
        # save cost at each iteration
        if i < 100000: # prevent resource exhaustion
            cost_history.append(compute_cost(X, y, w, b))
        # print cost at 10 intervals, or each iteration if < 10
        if i % math.ceil(num_iterations / 10) == 0:
            print(f"Iteration {i:4d}: Cost {cost_history[-1]:.2e}",
                  f"w: {w}, b: {b:.3e}")
        if math.isnan(cost_history[-1]):
            raise ValueError(f"Cost is not a number at iteration {i}.")
    return w, b, cost_history

In [None]:
# stock data from https://www.marketwatch.com/
x_train = read_csv_to_array('x_train.csv')
y_train = read_csv_to_array('y_train.csv')

In [None]:
np.set_printoptions(precision=3, suppress=True)

In [None]:
def z_score_normalize(X):
    m,n = X.shape
    z_scores = np.zeros((m, n))
    for i in range(n):
        x_mean = np.mean(X[:, i])
        x_std_dev = np.std(X[:, i])
        z_scores[:, i] = (X[:, i] - x_mean) / x_std_dev
    return z_scores

In [None]:
num_features = x_train.shape[1]
w = np.zeros(num_features)
x_train_normalized = z_score_normalize(x_train)
w, b, J_history = gradient_descent(x_train_normalized, y_train, w, 0, 0.1) # note alpha is much smaller than exercise 2

In [None]:
def saveModel(w, b):
    filename = 'model.csv'
    with open(filename, 'wb') as f:
        np.savetxt(f, w, delimiter=',')
    with open(filename, 'ab') as f:
        np.savetxt(f, [b], delimiter=',')

In [None]:
saveModel(w, b)

In [None]:
x_string = ''
wx_string = ''
for i in range(w.shape[0]):
    if i > 0:
        x_string += ', '
        wx_string += ' + '
    x_string += f'x_{i}'
    wx_string += f'{w[i]:.1e} x_{i}'
print(f'f_wb({x_string}) = {wx_string} + {b:.1e}')

In [None]:
fx = np.zeros(x_train_normalized.shape[0])
for i in range(x_train_normalized.shape[0]):
    fx[i] = np.dot(x_train_normalized[i], w) + b

In [None]:
plt.scatter(x_train[:, 0], y_train)
plt.plot(fx, color='r')
plt.show()

In [None]:
x_predict = read_csv_to_array('x_predict.csv')
y_predict = read_csv_to_array('y_predict.csv')
x_predict_normalized = z_score_normalize(x_predict)

In [None]:
predicted = np.zeros(x_predict_normalized.shape[0])
for i in range(x_predict_normalized.shape[0]):
    predicted[i] = np.dot(x_predict_normalized[i], w) + b

In [None]:
for i in range(x_predict_normalized.shape[0]):
    print(f"{int(x_predict_normalized[i, 0])}: |{predicted[i]:.2f} - {y_predict[i]}| = {abs(predicted[i] - y_predict[i]):.2f}")
cost_predict = compute_cost(x_predict_normalized, y_predict, w, b)
print(f"Cost of future predictions: {cost_predict:.2f}")

In [None]:
plt.scatter(x_train[:, 0], y_train)
plt.plot(fx, color='r')
plt.scatter(x_predict[:, 0], y_predict)
plt.plot(x_predict[:, 0], predicted, color='r')
plt.show() 