In [136]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_cleaning import x_train, y_train
import math
from copy import deepcopy


- Define the cost function

In [137]:
def cost_function(x, y, w, b):
    '''
    x: (m,n) array, m example with n features
    y: (m,) array, target values
    w: (n,) array, parameter
    b: scalar, parameter (bias)
    '''
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        dot_prod = np.dot(x[i], w)
        fwb_i = float(dot_prod) + b
        cost = cost + (fwb_i - y[i]) ** 2
    cost = cost / (2 * m)
    final_cost = cost[0]
    return final_cost

- Define the function that calculates the partial derivatives for w and b

In [138]:
def part_derivatives(x, y, w, b):
    '''
    x: (m,n) array, m example with n features
    y: (m,) array, target values
    w: (n,) array, parameter
    b: scalar, parameter (bias)

    Return values:
    dj_dw: (n,) array partial derivative of cost jwb for w
    dj_db: scalar partial derivative of cost jwb for b
    '''

    m,n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0

    for i in range(m):
        err = (np.dot(x[i], w) + b) - y[i] 
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err[0] * x[i, j]
        dj_db = dj_db + err[0]
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_db, dj_dw

- Def gradient descent function

In [139]:
def gradient_descent(x, y, w_in, b_in, cost_function, partial_derivatives, alpha, iterations):
    '''
    x: (m,n) array, m example with n features
    y: (m,) array, target values
    w_in: (n,) array, initial parameter
    b_in: scalar, initial parameter (bias)
    cost_function: function to calculate cost
    partial_derivatives: funtion to calculate dj_dw and dj_db
    alpha: float, learning rate
    iterations: intiger, number of iterations to run gradient descent
    '''

    j_values = []
    w = deepcopy(w_in)
    b = b_in

    for i in range(iterations):
        dj_db, dj_dw = part_derivatives(x, y, w, b)
        w = w - (alpha * dj_dw)
        b = b - (alpha * dj_db)

        if i < 100000:  
            j_values.append(cost_function(x, y, w, b))

        if i % 100 == 0:
            print(f"Iteration: {i}, Cost: {j_values[-1]}")

    return w, b, j_values
        
    


In [140]:
w_init = np.array([-1.0, -0.3])
b_init = -3.0
alpha = 1e-7
gradient_descent(x_train, y_train, w_init, b_init, cost_function, part_derivatives, alpha, iterations=100)
#cost_function(x_train, y_train, w_init, b_init)
#part_derivatives(x_train, y_train, w_init, b_init)



Iteration:    0, Cost: 170575314310.36
Iteration:  100, Cost: 117238805311.12
Iteration:  200, Cost: 105830484111.18
Iteration:  300, Cost: 101446130704.71
Iteration:  400, Cost: 99761171586.96


(array([1306.78424288,   82.35761548]),
 -3.0045374338714286,
 [170575314310.35846,
  155021851101.47696,
  149106267092.68015,
  146680319452.81137,
  145519607780.7213,
  144819339830.9462,
  144288323984.24432,
  143821184560.8398,
  143379772516.55377,
  142950260987.43015,
  142527622874.1069,
  142110014981.0348,
  141696746250.60617,
  141287542687.87686,
  140882281415.2948,
  140480894454.14273,
  140083333911.44073,
  139689559382.77908,
  139299533392.67038,
  138913219742.83606,
  138530582912.54532,
  138151587839.63187,
  137776199839.21542,
  137404384572.30122,
  137036108032.41966,
  136671336538.81989,
  136310036732.05067,
  135952175570.42812,
  135597720326.83717,
  135246638585.67732,
  134898898239.8729,
  134554467487.92989,
  134213314831.02492,
  133875409070.12212,
  133540719303.12248,
  133209214922.03175,
  132880865610.1677,
  132555641339.38776,
  132233512367.33394,
  131914449234.72289,
  131598422762.64734,
  131285404049.90921,
  130975364470.37004,
