In [1]:
import numpy as np

import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torchviz import make_dot

def print_network_parameters(network):
    # Print weights and biases
    for name, param in network.named_parameters():
        if param.requires_grad:
            print(f"{name}:")
            print(param.data)

def create_torch_XOR_dataset():
    x1 = np.array ([0., 0., 1., 1.], dtype = np.float32)
    x2 = np.array ([0., 1., 0., 1.], dtype = np.float32)
    y  = np.array ([0., 1., 1., 0.],dtype = np.float32)

    x1 = np.repeat(x1, 50)
    x2 = np.repeat(x2, 50)  
    y =  np.repeat(y,  50)
    
    # Add noise
    x1 = x1 + np.random.rand(x1.shape[0])*0.05
    x2 = x2 + np.random.rand(x2.shape[0])*0.05

    # Shuffle the data
    index_shuffle = np.arange(x1.shape[0])
    np.random.shuffle(index_shuffle)

    x1 = x1.astype(np.float32)
    x2 = x2.astype(np.float32)
    y = y.astype(np.float32)

    x1 = x1[index_shuffle]
    x2 = x2[index_shuffle]
    y = y[index_shuffle]

    # Convert data to pytorch tensors
    x1_torch = torch.from_numpy(x1).clone().view(-1,1)
    x2_torch = torch.from_numpy(x2).clone().view(-1,1)
    y_torch = torch.from_numpy(y).clone().view(-1,1)

    X = torch.hstack([x1_torch, x2_torch])

    X_train = X[:150,:]
    X_test  = X[150:,:]
    y_train = y_torch[:150,:]
    y_test  = y_torch[150:,:]

    return X_train, y_train, X_test, y_test

def train(model, loss_function, optimizer, x, y, no_of_epochs):
    # store loss for each epoch
    all_loss = []

    for epoch_index in range(no_of_epochs):
        # forward pass
        y_hat = model(x)

        # calculate the loss
        loss = loss_function(y_hat, y)
        all_loss.append(loss.item())
        loss.backward()

        # optimize the weights and bias

        # takes a step in the parameter step opposite to the gradient, peforms the update rule
        optimizer.step()

        # clears out the old gradients from the previous step
        optimizer.zero_grad()

        print(all_loss[epoch_index])

    return all_loss

def train_with_rectified_L2(model, loss_function, optimizer, x, y, no_of_epochs):
    all_loss = []

    for epoch_index in range(no_of_epochs):
        y_hat = model(x)
        # Regular loss calculation
        loss = loss_function(y_hat, y)

        

class TwoLayerMLP(nn.Module):
    def __init__(self):
        super(TwoLayerMLP, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.output_layer = nn.Linear(2, 1)
    
    def forward(self, x):
        x = torch.sigmoid(self.layer1(x))
        x = torch.sigmoid(self.output_layer(x))
        return x

In [1]:
X_train, y_train, X_test, y_test = create_torch_XOR_dataset()
# plt.scatter(X_train[:,0], X_train[:,1], c = y_train)
# plt.show()

model_XOR = TwoLayerMLP()

# define the loss
loss_function = torch.nn.BCELoss()

optimizer = torch.optim.SGD(model_XOR.parameters(), lr=0.1)
all_loss = train(model_XOR, loss_function, optimizer, X_train, y_train, 90000)

NameError: name 'create_torch_XOR_dataset' is not defined

In [8]:
# Example input: a batch of 1 sample with 2 features
input1 = torch.tensor([[0.0, 0.0]])
input2 = torch.tensor([[0.0, 1.0]])
input3 = torch.tensor([[1.0, 0.0]])
input4 = torch.tensor([[1.0, 1.0]])
# Forward pass to get the output
output1 = model_XOR(input1)
output2 = model_XOR(input2)
output3 = model_XOR(input3)
output4 = model_XOR(input4)

print("Output0-0:", output1)
print("Output0-1:", output2)
print("Output1-0:", output3)
print("Output1-1:", output4)

Output0-0: tensor([[0.0010]], grad_fn=<SigmoidBackward0>)
Output0-1: tensor([[0.9989]], grad_fn=<SigmoidBackward0>)
Output1-0: tensor([[0.9986]], grad_fn=<SigmoidBackward0>)
Output1-1: tensor([[0.0009]], grad_fn=<SigmoidBackward0>)


In [13]:
def print_network_parameters_for_neurons(network):
    # Print weights and biases for each neuron
    for name, param in network.named_parameters():
        if param.requires_grad:
            print(f"{name}:")
            if name.endswith('weight'):  # If the parameter is a weight
                for idx, weights in enumerate(param):
                    print(f"  Neuron {idx + 1} weights: {weights.data.numpy()}")
            elif name.endswith('bias'):  # If the parameter is a bias
                for idx, bias in enumerate(param):
                    print(f"  Neuron {idx + 1} bias: {bias.data.item()}")


print_network_parameters_for_neurons(model_XOR)

layer1.weight:
  Neuron 1 weights: [-7.3517265  7.0466037]
  Neuron 2 weights: [-7.790935   7.9490924]
layer1.bias:
  Neuron 1 bias: -3.7243492603302
  Neuron 2 bias: 3.8839123249053955
output_layer.weight:
  Neuron 1 weights: [ 14.864945 -14.461322]
output_layer.bias:
  Neuron 1 bias: 6.887348175048828
