In [3]:
import numpy as np

class SGD:
    def __init__(self, lr=0.01, epochs=1000, batch_size=32, tol=1e-3):
        self.learning_rate = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.tolerance = tol
        self.weights = None
        self.bias = None

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

    def mean_squared_error(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)

    def gradient(self, X_batch, y_batch):
        y_pred = self.predict(X_batch)
        error = y_pred - y_batch
        gradient_weights = np.dot(X_batch.T, error) / X_batch.shape[0]
        gradient_bias = np.mean(error)
        return gradient_weights, gradient_bias

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.random.randn(n_features)
        self.bias = np.random.randn()

        for epoch in range(self.epochs):
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]

                gradient_weights, gradient_bias = self.gradient(X_batch, y_batch)
                self.weights -= self.learning_rate * gradient_weights
                self.bias -= self.learning_rate * gradient_bias

            if epoch % 100 == 0:
                y_pred = self.predict(X)
                loss = self.mean_squared_error(y, y_pred)
                print(f"Epoch {epoch}: Loss {loss}")

            if np.linalg.norm(gradient_weights) < self.tolerance:
                print("Convergence reached.")
                break

        return self.weights, self.bias


In [4]:
# Create random dataset with 100 rows and 5 columns
X = np.random.randn(100, 5)
# create corresponding target value by adding random
# noise in the dataset
y = np.dot(X, np.array([1, 2, 3, 4, 5]))\
    + np.random.randn(100) * 0.1
# Create an instance of the SGD class
model = SGD(lr=0.01, epochs=1000,
            batch_size=32, tol=1e-3)
w,b=model.fit(X,y)
# Predict using predict method from model
y_pred = w*X+b
#y_pred


Epoch 0: Loss 81.45788324264794
Epoch 100: Loss 0.030059514958443407
Epoch 200: Loss 0.009356858402668993
Epoch 300: Loss 0.009254091033209351
Epoch 400: Loss 0.009260495108998753
Epoch 500: Loss 0.009288844748674127
Epoch 600: Loss 0.009253827185581471
Epoch 700: Loss 0.009263200760247685
Epoch 800: Loss 0.009258167465576787
Epoch 900: Loss 0.00925870539632353


In [5]:
import tensorflow as tf
import numpy as np

class SGD:
    def __init__(self, lr=0.001, epochs=2000, batch_size=32, tol=1e-3):
        self.learning_rate = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.tolerance = tol
        self.weights = None
        self.bias = None

    def predict(self, X):
        return tf.matmul(X, self.weights) + self.bias

    def mean_squared_error(self, y_true, y_pred):
        return tf.reduce_mean(tf.square(y_true - y_pred))

    def gradient(self, X_batch, y_batch):
        with tf.GradientTape() as tape:
            y_pred = self.predict(X_batch)
            loss = self.mean_squared_error(y_batch, y_pred)
        gradient_weights, gradient_bias = tape.gradient(loss, [self.weights, self.bias])
        return gradient_weights, gradient_bias

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = tf.Variable(tf.random.normal((n_features, 1)))
        self.bias = tf.Variable(tf.random.normal(()))

        for epoch in range(self.epochs):
            indices = tf.random.shuffle(tf.range(n_samples))
            X_shuffled = tf.gather(X, indices)
            y_shuffled = tf.gather(y, indices)

            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]

                gradient_weights, gradient_bias = self.gradient(X_batch, y_batch)
                # Gradient clipping
                gradient_weights = tf.clip_by_value(gradient_weights, -1, 1)
                gradient_bias = tf.clip_by_value(gradient_bias, -1, 1)

                self.weights.assign_sub(self.learning_rate * gradient_weights)
                self.bias.assign_sub(self.learning_rate * gradient_bias)

            if epoch % 100 == 0:
                y_pred = self.predict(X)
                loss = self.mean_squared_error(y, y_pred)
                print(f"Epoch {epoch}: Loss {loss}")

            if tf.norm(gradient_weights) < self.tolerance:
                print("Convergence reached.")
                break

        return self.weights.numpy(), self.bias.numpy()

# Create random dataset with 100 rows and 5 columns
X = np.random.randn(100, 5).astype(np.float32)
# Create corresponding target value by adding random
# noise in the dataset
y = np.dot(X, np.array([1, 2, 3, 4, 5], dtype=np.float32)) + np.random.randn(100).astype(np.float32) * 0.1

# Create an instance of the SGD class
model = SGD(lr=0.005, epochs=1000, batch_size=12, tol=1e-3)
w, b = model.fit(X, y)

# Predict using predict method from model
y_pred = np.dot(X, w) + b


Epoch 0: Loss 68.17742156982422
Epoch 100: Loss 65.45679473876953
Epoch 200: Loss 65.3834457397461
Epoch 300: Loss 65.32938385009766
Epoch 400: Loss 65.40409851074219
Epoch 500: Loss 65.42749786376953
Epoch 600: Loss 65.39996337890625
Epoch 700: Loss 65.32992553710938
Epoch 800: Loss 65.40864562988281
Epoch 900: Loss 65.41107940673828
