In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_cleaning import x_train, y_train
import math
from copy import deepcopy


- Define the cost function

In [None]:
def cost_function(x, y, w, b):
    '''
    x: (m,n) array, m example with n features
    y: (m,) array, target values
    w: (n,) array, parameter
    b: scalar, parameter (bias)
    '''
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        dot_prod = np.dot(x[i], w)
        fwb_i = float(dot_prod) + b
        cost = cost + (fwb_i - y[i]) ** 2
    cost = cost / (2 * m)
    final_cost = cost[0]
    return final_cost

- Define the function that calculates the partial derivatives for w and b

In [None]:
def part_derivatives(x, y, w, b):
    '''
    x: (m,n) array, m example with n features
    y: (m,) array, target values
    w: (n,) array, parameter
    b: scalar, parameter (bias)

    Return values:
    dj_dw: (n,) array partial derivative of cost jwb for w
    dj_db: scalar partial derivative of cost jwb for b
    '''

    m,n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0

    for i in range(m):
        err = (np.dot(x[i], w) + b) - y[i] 
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err[0] * x[i, j]
        dj_db = dj_db + err[0]
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_db, dj_dw

- Def gradient descent function

In [None]:
def gradient_descent(x, y, w_in, b_in, cost_function, partial_derivatives, alpha, iterations):
    '''
    x: (m,n) array, m example with n features
    y: (m,) array, target values
    w_in: (n,) array, initial parameter
    b_in: scalar, initial parameter (bias)
    cost_function: function to calculate cost
    partial_derivatives: funtion to calculate dj_dw and dj_db
    alpha: float, learning rate
    iterations: intiger, number of iterations to run gradient descent
    '''

    j_values = []
    w = deepcopy(w_in)
    b = b_in

    for i in range(iterations):
        dj_db, dj_dw = part_derivatives(x, y, w, b)
        w = w - (alpha * dj_dw)
        b = b - (alpha * dj_db)

        if i < 100000:  
            j_values.append(cost_function(x, y, w, b))

        if i % 100 == 0:
            print(f"Iteration: {i}, Cost: {j_values[-1]}")

    return w, b, j_values
        
    


In [None]:
def model_predict(x, w, b):
    '''
    x: (n,) array, n features
    w: (n,) array, n parameters
    b: scalar, parameter (bias)
    '''


    dot_prod = np.dot(x, w)
    pred = float(dot_prod) + b
    return pred

In [None]:
w_init = np.array([-1.0, -0.3])
b_init = -3.0
alpha = 1e-7
gradient_descent(x_train, y_train, w_init, b_init, cost_function, part_derivatives, alpha, iterations=100)


