In [1]:
#Apply any of the following learning algorithms to learn the parameters of the supervised single layer feed forward neural network.
#a. Stochastic Gradient Descent
#b. Mini Batch Gradient Descent


In [2]:
#Stochastic Gradient Descent
import numpy as np

In [3]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [4]:
def single_layer_neural_network(X,W,b):
  return sigmoid(np.dot(X,W)+b)

In [5]:
def SGD(X,y,learning_rate=0.01,epochs=100):
  num_samples, num_features = X.shape
  W = np.random.randn(num_features, 1)
  b = np.random.randn()

  for epoch in range(epochs):
    for i in range(num_samples):
      xi = X[i].reshape(-1,1)
      yi = y[i]

      z = np.dot(xi.T,W) + b
      a = sigmoid(z)

      #Calculate gradients
      dW = xi * (a-yi)
      db = a - yi

      #Update weights and bias
      W -= learning_rate * dW
      b -= learning_rate * db

  return W,b

In [6]:
X_train = np.array([[0,0],[0,1],[1,0],[1,1]])
y_train = np.array([0,1,1,0])
W_optimal, b_optimal = SGD(X_train,y_train)

In [7]:
print("Optimal weights: ",W_optimal)
print("Optimal bias: ",b_optimal)

Optimal weights:  [[-0.08144589]
 [-0.14707741]]
Optimal bias:  [[0.16365817]]


In [8]:
#Mini Batch Gradient Descent
import numpy as np

In [14]:
#Features
X = np.array([[0,0],[0,1],[1,0],[1,1]])

#Labels
y = np.array([0,1,1,0])

#Initialize parameters
input_size = 2
output_size = 1
learning_rate = 0.01
batch_size = 4
epochs = 100

#Initialize weights and bias
weights = np.random.rand(input_size)
bias = np.random.rand()

In [16]:
#Mini batch gradient descent
for epoch in range(epochs):
  for i in range(0, len(X), batch_size):
    X_batch = X[i:i+batch_size]
    y_batch = y[i:i+batch_size]

    #Forward pass
    z = np.dot(X_batch, weights) + bias
    predictions = 1/(1+np.exp(-z))

    #Compute gradients
    error = predictions - y_batch
    gradient_weights = np.dot(X_batch.T, error)/batch_size
    gradient_bias = np.sum(error)/batch_size

    #Update weight and bias
    weights -= learning_rate * gradient_weights
    bias -= learning_rate * gradient_bias

  if epoch % 1000 == 0:
    loss = np.mean(-y_batch * np.log(predictions) - (1 - y_batch) * np.log(1 - predictions))
    print(f"Epoch {epoch}/{epochs}, Loss: {loss: .4f}")


print("Training completed")
print("Weight: ",weights)
print("Bias: ",bias)

Epoch 0/100, Loss:  0.7173
Training completed
Weight:  [-0.00425667 -0.07186401]
Bias:  0.34289831193050585
