In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Cria o dataset
dataset = pd.read_csv('dataset.csv', header=None)
dataset.columns = ['X', 'y']
dataset.describe()

Unnamed: 0,X,y
count,47.0,47.0
mean,2000.680851,340412.659574
std,794.702354,125039.899586
min,852.0,169900.0
25%,1432.0,249900.0
50%,1888.0,299900.0
75%,2269.0,384450.0
max,4478.0,699900.0


In [11]:
class LinearRegression:
    def __init__(self, X, y, learning_rate, epochs) -> None:
        # Taxa de aprendizado (Learning Rate) 
        self.learning_rate = learning_rate
        # Numero de iterações
        self.epochs = epochs
        # Dados de treino
        self.X = X
        self.y = y
        # Descompacta a tupla de linhas-colunas
        self.m, self.n = self.X.shape
        # Define a matriz de pesos com a qtd linhas igual qtd de colunas dos dados de treino
        self.weights = np.zeros((self.n, 1))
        # Cria o bias igual a zero
        self.bias = 0


    # Predição
    def predict(self, X):
        '''Cria o modelo linear'''
        return np.dot(X, self.weights) + self.bias


    # Gradiente
    def gradient(self, y_predicted):
        '''Calcula as derivadas e a taxa de erro que será usada na MSE'''
        error = y_predicted - self.y
        dw = (1/ self.n) * np.dot(self.X.T, error)
        db = (1/ self.n) * np.sum(y_predicted - self.y)

        return dw, db


    # Função de custo
    def MSE(self, y_predicted):
        '''Eleva os erros ao quadrado e faz o somatório'''
        return np.sum((y_predicted - self.y) ** 2) / self.m


    # Gradiente Descendente
    def GD(self):
        '''Atualiza os pesos e o bias'''
        for _ in range(self.epochs):
            y_predicted = self.predict(self.X)
            dw, db = self.gradient(y_predicted)
            
            self.weights = self.weights - (self.learning_rate * dw)
            self.bias = self.bias - (self.learning_rate * db)

In [15]:
# Cria matrizes (1,1) das colunas 'X' e 'y' 
X, y = dataset['X'].values.reshape(-1, 1), dataset['y'].values.reshape(-1, 1)

# Faz a normalização dos dados (formula da distribuição normal)
X = (X - X.mean()) / X.std()
y = (y - y.mean()) / y.std()

# Separa os dados do dataset em subdatasets de treino-teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
model = LinearRegression(X=X_train, y=y_train, learning_rate=0.001, epochs=1000)
model.GD()

In [19]:
print(model.weights * 2100 + model.bias)

[[1793.13795017]]
