In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

class RegressaoLinear():
    def __init__(self, X, y, lr, epochs):
        self.X = X
        self.y = y
        self.lr = lr

        self.m, self.n = self.X.shape
        self.epochs = epochs 
        self.w = np.zeros((self.n,1))
        self.b = 0


    # Predição
    def predict(self, X):
        return np.dot(X, self.w) + self.b
    

    # Gradiente
    def gradiente(self, pred):
        '''Calcula as derivadas e os erros que serão elevados ao quadrado na MSE'''
        erro = pred - self.y
        dw = (1 / self.n) * np.dot(self.X.T, erro)
        db = (1 / self.n) * np.sum(erro)
        return dw, db
    

    # Função de Custo
    def MSE(self, pred):
        '''Eleva os erros ao quadrado e faz o somatório'''
        return np.sum((pred - self.y)**2) / self.m


    # Gradiente Descendente
    def GD(self):
        custo_total = []
        for iteracao in range(self.epochs):
            pred   = self.predict(self.X)
            dw, db = self.gradiente(pred)
            self.w = self.w - self.lr * dw
            self.b = self.b - self.lr * db

            custo = self.MSE(pred)
            custo_total.append(custo)

In [3]:

dataset = pd.read_csv('dataset.csv', header=None)
dataset.columns = ['X', 'y']
dataset.describe()

Unnamed: 0,X,y
count,47.0,47.0
mean,2000.680851,340412.659574
std,794.702354,125039.899586
min,852.0,169900.0
25%,1432.0,249900.0
50%,1888.0,299900.0
75%,2269.0,384450.0
max,4478.0,699900.0


In [16]:
# Cria matrizes (1,1) das colunas 'X' e 'y' 
X, y = dataset['X'].values.reshape(-1, 1), dataset['y'].values.reshape(-1, 1)
# Faz a normalização dos dados (formula da distribuição normal)
X = (X - X.mean()) / X.std()
y = (y - y.mean()) / y.std()
# Separa os valores
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


37 47
