In [1]:
from sklearn.datasets import fetch_california_housing
import math, copy
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from lab_utils_uni import plt_intuition, plt_stationary, plt_update_onclick, soup_bowl
import time
plt.style.use('./deeplearning.mplstyle')

Bad value in file './deeplearning.mplstyle', line 6 ('axes.edgecolor : #4f4f4f'): Key axes.edgecolor: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 7 ('axes.labelcolor : #4f4f4f'): Key axes.labelcolor: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 17 ('xtick.color : #4f4f4f'): Key xtick.color: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 19 ('ytick.color : #4f4f4f'): Key ytick.color: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 6 ('axes.edgecolor : #4f4f4f'): Key axes.edgecolor: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 7 ('axes.labelcolor : #4f4f4f'): Key axes.labelcolor: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 17 ('xtick.color : #4f4f4f'): Key xtick.color: '' does not look like a color arg
Bad value in file './deeplearning.mplstyle', line 19 ('ytic

In [None]:
def predict(x, w, b): 
    """
    single predict using linear regression
    Args:
      x (ndarray): Shape (n,) example with multiple features
      w (ndarray): Shape (n,) model parameters   
      b (scalar):             model parameter 
      
    Returns:
      p (scalar):  prediction
    """
    p = np.dot(x, w) + b     
    return p  


def compute_cost(X, y, w, b): 
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """
    m = X.shape[0]
    cost = 0.0
    for i in range(m):                                
        f_wb_i = np.dot(X[i], w) + b           #(n,)(n,) = scalar (see np.dot)
        cost = cost + (f_wb_i - y[i])**2       #scalar
    cost = cost / (2 * m)                      #scalar    
    return cost


def compute_cost_vectorized(X, y, w, b): 
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
    Returns:
      cost (scalar): cost
    """
    m = X.shape[0]
    f_wb = np.dot(X, w) + b
    err = f_wb - y
    cost = np.sum(err**2) / (2 * m)
    return cost



def compute_gradient(X, y, w, b): 
    m, n = X.shape          
    dj_dw = np.zeros((n,))  
    dj_db = 0.               
    for i in range(m):                             # Loop pelos exemplos de treino
        # 1. Calcula o erro da previsão para este exemplo
        # f_wb é o "palpite" do modelo (combinação de todos os x com os w)
        err = (np.dot(X[i], w) + b) - y[i]   
        # 2. Calcula o impacto desse erro em cada peso w_j
        for j in range(n):                         # Loop pelas características
            dj_dw[j] = dj_dw[j] + err * X[i, j]    # Multiplica erro pela característica j
            
        # 3. O erro de b não depende de nenhuma característica x
        dj_db = dj_db + err                        
    # No final, divide pela média (m)
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m                                
        
    return dj_db, dj_dw


def compute_gradient_vectorized(X, y, w, b):
    m, n = X.shape
    
    # 1. Calcula o erro para todas as linhas de uma vez (vetor de tamanho m)
    f_wb = X @ w + b
    err = f_wb - y
    
    # 2. Calcula o gradiente de w (vetor de tamanho n)
    # Multiplica a transposta de X pelo erro e divide por m
    dj_dw = (1/m) * (X.T @ err)
    
    # 3. Calcula o gradiente de b (escalar)
    dj_db = np.sum(err) / m
    
    return dj_db, dj_dw

    
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 


    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    
    for i in range(num_iters):
        dj_db, dj_dw = gradient_function(X, y, w, b)
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db               
      
        if i < 100000:      
            J_history.append(cost_function(X, y, w, b))
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}")
        
    return w, b, J_history

In [5]:
housing = fetch_california_housing()
x_train = housing.data[:, :4]  # Pega as colunas 0, 1, 2 e 3
y_train = housing.target
print(f"Tamanho dos dados de treinamento: {x_train.shape}")


initial_w = np.zeros(x_train.shape[1])
initial_b = 0.

# configurações para a descida gradiente
iterations = 1000
alpha = 1.0e-7


print("Gradient descent sem vetorização")
start = time.time()
# ... rodar versao loop ...
# descida gradiente sem vetorização
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
end = time.time()
print(f"[FOR LOOP] Tempo: {end - start:.4f} segundos")

print("Gradient descent com vetorização")
start = time.time()
# descida gradiente com vetorização
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b,
                                                    compute_cost_vectorized, compute_gradient_vectorized, 
                                                    alpha, iterations)
end = time.time()
print(f"[FOR LOOP] Tempo: {end - start:.4f} segundos")

print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

# m,_ = x_train.shape
#for i in range(m):
#   print(f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}") 

Tamanho dos dados de treinamento: (20640, 4)
Gradient descent sem vetorização
Iteration    0: Cost     2.80
Iteration  100: Cost     2.77
Iteration  200: Cost     2.73
Iteration  300: Cost     2.69
Iteration  400: Cost     2.65
Iteration  500: Cost     2.62
Iteration  600: Cost     2.58
Iteration  700: Cost     2.55
Iteration  800: Cost     2.51
Iteration  900: Cost     2.48
[FOR LOOP] Tempo: 101.3458 segundos
Gradient descent com vetorização
Iteration    0: Cost     2.80
Iteration  100: Cost     2.77
Iteration  200: Cost     2.73
Iteration  300: Cost     2.69
Iteration  400: Cost     2.65
Iteration  500: Cost     2.62
Iteration  600: Cost     2.58
Iteration  700: Cost     2.55
Iteration  800: Cost     2.51
Iteration  900: Cost     2.48
[FOR LOOP] Tempo: 0.0879 segundos
b,w found by gradient descent: 0.00,[0.00091759 0.00577627 0.00111898 0.0002146 ] 
