# Vectorisation 

In [23]:
%matplotlib inline

import numpy as np

# Create input and outut data:

In [24]:
x_train = np.array([56, 48, 66, 72])
y_train = np.array([130000, 118000, 145000, 180000])
x_train

array([56, 48, 66, 72])

# Manual define model:

In [25]:
w = 201
b = 30000
w

201

# Model linear regression NON-vectorized

In [26]:
m = x_train.shape[0]
f_wb = np.zeros(m)
for i in range(m):
    f_wb[i] = w*x_train[i] + b
f_wb

array([41256., 39648., 43266., 44472.])

# Model linear regression Vectorized

## Using np.dot function requires:
- x_train - is np vector of dimension (x, 1)
- w - is np vector of dimension (1,1)

In [27]:
print(f'x_train shape origin: {x_train.shape}')
x_train = x_train.reshape(-1, 1)
print(f'x_train shape reshaped: {x_train.shape}')

w = np.array([201]) ## same as np.array([[201]])
print(f'w shape origin: {w.shape}')
w = w.reshape(-1, 1)
print(f'w shape reshaped: {w.shape}')

x_train shape origin: (4,)
x_train shape reshaped: (4, 1)
w shape origin: (1,)
w shape reshaped: (1, 1)


## Vectorised model

In [28]:
f_x = np.dot(x_train, w) + b
f_x

array([[41256],
       [39648],
       [43266],
       [44472]])

# Cost function

In [29]:
x_train = np.array([56, 48, 66, 72])
y_train = np.array([13, 11, 14, 18])
w = 201
b = 300
m = x_train.shape[0] 

In [30]:
cost_sum = 0
for i in range(m): 
    f_wb = w * x_train[i] + b   
    cost = (f_wb - y_train[i]) ** 2  
    cost_sum += cost  
total_cost = (1 / (2.0 * m)) * cost_sum  

print(total_cost)

79165254.75


## Vectorized cost function

In [31]:
x_train = np.array([56, 48, 66, 72]) # (4, )
x_train = x_train.reshape(-1,1) # (4, 1)
y_train = np.array([13, 11, 14, 18]) #(4, )
y_train = y_train.reshape(-1, 1)# (4, 1)
w = np.array([201]) # (1, )
w = w.reshape(-1, 1) # (1, 1)
b = 300
m = x_train.shape[0] 

In [32]:
f_wb = np.dot(x_train, w) + b
cost_sum = np.sum((f_wb - y_train)**2)
total_cost = (1 / (2.0 * m)) * cost_sum

print(total_cost)

79165254.75


## Gradient Descent

In [33]:
x_train = np.array([56, 48, 66, 72])
y_train = np.array([13, 11, 14, 18])
w = 201
b = 300
m = x_train.shape[0] 

In [34]:
dJ_dw = 0
dJ_db = 0

for i in range(m):
    f_x = w * x_train[i] + b
    dJ_dw_i = (f_x - y_train[i]) * x_train[i]
    dJ_db_i  = (f_x - y_train[i])
    dJ_dw += dJ_dw_i
    dJ_db += dJ_db_i
dJ_dw = dJ_dw / m
dJ_db = dJ_db / m

print(dJ_dw, dJ_db )

770026.0 12446.5


## Vektorized

In [35]:
x_train = np.array([56, 48, 66, 72]) # (4, )
x_train = x_train.reshape(-1,1) # (4, 1)
y_train = np.array([13, 11, 14, 18]) #(4, )
y_train = y_train.reshape(-1, 1)# (4, 1)
w = np.array([201]) # (1, )
w = w.reshape(-1, 1) # (1, 1)
b = 300
m = x_train.shape[0] 

In [36]:
f_wb = np.dot(x_train, w) + b
# (f_wb - y_train) has to be row vector not column vector so: (f_wb - y_train).T
'''
    because x_train is: []
                        []
                        []
                        []
    (f_wb - y_train) is: also vector (4, 1) and has to be (1, 4)
    (f_wb - y_train).T is (1, 4): [][][][]
'''
dJ_dw = np.dot((f_wb - y_train).T, x_train) / m 
dJ_db = np.sum(f_wb - y_train) / m

print(dJ_dw[0][0], dJ_db)

770026.0 12446.5
