In [27]:
import numpy as np
import matplotlib.pyplot as plt

In [28]:
def lr_schadule(t, a=1, b=1000):
    return a/(b+t)

def linear_predict(X, w):
    return X@w

def mse_loss(y_hat, y):
    error = y_hat-y
    loss = np.sum(error*error)/y.shape[0]
    return error, loss

def calc_gradient(X, error):
    return X.T@error

def update_weights(w, lr, gradient):
    return w-(lr*gradient)

def MGD(X, y, w, lr, n_epochs, batch_size):
    m = len(X)
    
    for epoch in range(n_epochs):
        shuffled_indexes = np.random.permutation(m)
        X_shuffled = X[shuffled_indexes]
        y_shuffled = y[shuffled_indexes]
        for batch in range(0,m, batch_size):
            xi = X_shuffled[batch:batch+batch_size]
            yi = y_shuffled[batch:batch+batch_size]
            y_hat = linear_predict(xi, w)
            error, loss = mse_loss(y_hat, yi)
            gradient = calc_gradient(xi, error)
            lr = lr_schadule(epoch*batch)
            w = update_weights(w, lr, gradient)
    return w


In [29]:
from sklearn.model_selection import train_test_split
import pandas as pd

In [30]:
df = pd.read_csv('../data/fuelConsumption.csv')
y = df[['CO2EMISSIONS']].values
x = df[['FUELCONSUMPTION_CITY']].values

In [31]:
np.random.seed(7)
n_epochs = 100
lr = 0.001
batch_size = 32
x0 = np.ones((x.shape[0],1))
X = np.concatenate((x0,x), axis=1)
w= np.random.rand(X.shape[1],1)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
w = MGD(x_train, y_train, w, lr, n_epochs, batch_size)
y_pred = linear_predict(x_test,w)
_, test_loss_2 = mse_loss(y_pred, y_test)
test_loss_2
        

np.float64(8.345821796081594e+106)