# Python Implementation of Gradient Descent Algorithm on Multiple Variables
### Gradient descent is an optimization algorithm which is commonly-used to train machine learning models and neural networks

In [1]:
# Importing modules
import pandas as pd
import numpy as np

In [2]:
# Training data
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [3]:
# Visualizing Training data
df1 = pd.DataFrame(x_train, columns=["x1", "x2", "x3", "x4"])
df2 = pd.DataFrame(y_train, columns=["y"])
pd.concat([df1, df2], axis=1)

Unnamed: 0,x1,x2,x3,x4,y
0,2104,5,1,45,460
1,1416,3,2,40,232
2,852,2,1,35,178


In [4]:
# Function computing cost (J)
def compute_cost(x, y, w, b):
    
    m, n = x.shape
    cost = 0.0
    
    for i in range(0, m):
        fx = np.dot(x[i], w) + b
        cost += (fx - y[i])**2
    total_cost = (1/(2*m))*cost
    return total_cost

In [5]:
# Function computing model parameters (w, b)
def compute_gradient(x, y, w, b, alpha):
    
    m, n = x.shape
    dj_dw = np.zeros((n, ))
    dj_db = 0.0
    
    for i in range(0, m):
        error = (np.dot(x[i], w) + b) -y[i]
        for j in range(0, n):
            dj_dw[j] += error * x[i, j]
        dj_db += error
    
    w = w - alpha*(dj_dw/m)
    b = b - alpha*(dj_db/m)
    
    return w, b

In [6]:
# Initializing parameters
init_w = np.array([0.0, 0.0, 0.0, 0.0])
init_b = 0.0
alpha = 5.0e-7
iterations = 10000

# Initialining variables
history_j = []
history_w = []
history_b = []

# Executing Gradient Descent
w = init_w
b = init_b
for i in range(0, iterations+1):
    cost = compute_cost(x_train, y_train, w, b)
    parameters = compute_gradient(x_train, y_train, w, b, alpha)
    history_j.append(cost)
    history_w.append(parameters[0])
    history_b.append(parameters[1])
    w = parameters[0]
    b = parameters[1]

In [7]:
# Dataframe
pd.DataFrame({'J_wb':history_j, 'w':history_w, 'b':history_b})

Unnamed: 0,J_wb,w,b
0,49518.000000,"[0.24133466666666664, 0.0005586666666666666, 0...",0.000145
1,2529.462952,"[0.19458207261755558, 0.0004543676297777779, 0...",0.000114
2,765.833683,"[0.20364184746067152, 0.00047850772752688816, ...",0.000117
3,699.629028,"[0.20188885485478927, 0.0004777647382371763, 0...",0.000114
4,697.133743,"[0.20223066552788063, 0.0004818424273224672, 0...",0.000111
...,...,...,...
9996,624.844933,"[0.21699546121079472, 0.032398597489196046, -0...",-0.019073
9997,624.840397,"[0.21699663701591032, 0.03240170432469925, -0....",-0.019075
9998,624.835862,"[0.21699781276827326, 0.032404811145154575, -0...",-0.019076
9999,624.831328,"[0.21699898846788582, 0.03240791795056268, -0....",-0.019078


In [8]:
# Make prediction
new_x = [1410, 3, 2, 38]
print(np.dot(history_w[-1], new_x) + history_b[-1])

283.79235133650286
