# Mathematical Circuits

In [120]:
import torch
from torch import nn

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {DEVICE}")

Using cpu


## Helpers

In [121]:
def Neural_Net(layer_sizes, bias=True, verbose=True):
    assert(len(layer_sizes) >= 2)
    
    # Get individual layer pieces:
    #  - Each layer is linear
    #  - All hidden layers have ReLU activation
    layers = []
    for i in range(len(layer_sizes) - 1):
        if i == len(layer_sizes) - 2:  # Last (output) layer
            layers += [nn.Linear(layer_sizes[i], layer_sizes[i+1], bias=bias)]
        else:
            layers += [
                nn.Linear(layer_sizes[i], layer_sizes[i+1], bias=bias),
                nn.ReLU()
            ]
    
    # Model specification:
    class NeuralNetwork(nn.Module):
        def __init__(self):
            super(NeuralNetwork, self).__init__()
            self.NN = nn.Sequential(*layers)

        def forward(self, x):
            logits = self.NN(x)
            return logits
        
        def print_model_parameters(self):
            print("Weights:")
            print("--------")

            all_weights = list(self.parameters())
            for i, layer_weights in enumerate(all_weights):
                if i != (len(all_weights) - 1):
                    print(f"hidden_layer_{i}:")
                else:
                    print("output_layer:")

                print(layer_weights.T)
                print("")

        def _set_data_generators(self, generate_Xs, get_Ys):
            self.generate_Xs = generate_Xs
            self.get_Ys = get_Ys

        def _init_loss_optimizer(self, lr=1e-1):
            self.loss_fn = nn.MSELoss()
            self.optimizer = torch.optim.SGD(self.parameters(), lr=lr)

        def _init_test_data(self, test_n=1000, verbose=True):
            self.test_X = self.generate_Xs(n=test_n)
            self.test_y = self.get_Ys(self.test_X)

            predicted = self(self.test_X)
            loss = self.loss_fn(predicted, self.test_y).item()
            if verbose: print("Starting Loss =", loss)
    
    # Model initialization:
    model = NeuralNetwork().to(DEVICE)
    if verbose:
        print(model)
        print("")
        model.print_model_parameters()
    return model

def init_Neural_Net(layer_sizes, generate_Xs, get_Ys, bias=True, lr=1e-1, test_n=1000, verbose=True):
    model = Neural_Net(layer_sizes, bias, verbose)
    model._set_data_generators(generate_Xs, get_Ys)
    model._init_loss_optimizer(lr)
    model._init_test_data(test_n, verbose)
    return model

def train(model, batch_size=1000, batches_to_run=500, print_every=100):
    for i in range(batches_to_run):
        # Put model into training mode:
        model.train()
        
        # Get current batch data
        train_X = model.generate_Xs(n=batch_size)
        train_y = model.get_Ys(train_X)
        
        # Do forward pass and evaluate loss
        predicted = model(train_X)
        loss = model.loss_fn(predicted, train_y)

        # Backpropagation
        model.optimizer.zero_grad()
        loss.backward()
        model.optimizer.step()

        # Reporting
        if i % print_every == 0:
            model.eval()
            predicted = model(model.test_X)
            loss = model.loss_fn(predicted, model.test_y)
            print("loss =", loss.item())
            # model.print_model_parameters()
            print("")

    model.print_model_parameters()

## Identity Function: f(x) = x

In [122]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 1)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs

model = init_Neural_Net(
    layer_sizes=(1, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0075,  0.5364]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.5820],
        [-0.5204]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.45063677430152893


In [123]:
train(
    model,
    batches_to_run=500
)

loss = 0.4366095960140228

loss = 0.15338784456253052

loss = 5.638498259941116e-05

loss = 6.232232091507584e-11

loss = 1.6763340390281434e-13

Weights:
--------
hidden_layer_0:
tensor([[-0.9212,  1.0036]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-1.0855],
        [ 0.9964]], grad_fn=<PermuteBackward0>)



## Negative Identity Function: f(x) = -x

In [124]:
# NOTE: Random seed 0 gets stuck in a local optimum where it works very well for positive inputs,
# but always returns 0 for negative inputs:
torch.manual_seed(1)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 1)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs * -1

model = init_Neural_Net(
    layer_sizes=(1, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[ 0.5153, -0.4414]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.1371],
        [ 0.3319]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.26709434390068054


In [125]:
train(
    model,
    batches_to_run=500
)

loss = 0.2619011104106903

loss = 1.8015611203736626e-05

loss = 1.723086777427607e-11

loss = 1.9885521452146088e-13

loss = 1.8276527481101562e-13

Weights:
--------
hidden_layer_0:
tensor([[ 1.0619, -1.0210]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.9417],
        [ 0.9794]], grad_fn=<PermuteBackward0>)



## Absolute Value: f(x) = |x|

In [126]:
# NOTE: Random seed 0 gets stuck in a local optimum where the weight one of the hidden nodes gets zeroed out.
# As a result ends up working for only positive inputs, but always returns 0 for negative inputs:
torch.manual_seed(1)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 1)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return torch.abs(Xs)

model = init_Neural_Net(
    layer_sizes=(1, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[ 0.5153, -0.4414]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.1371],
        [ 0.3319]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.31527942419052124


In [127]:
train(
    model,
    batches_to_run=500
)

loss = 0.3092348575592041

loss = 0.00012211436114739627

loss = 1.1456475351723228e-10

loss = 1.989102106766602e-13

loss = 1.828202574136878e-13

Weights:
--------
hidden_layer_0:
tensor([[ 1.0608, -1.0210]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[0.9427],
        [0.9794]], grad_fn=<PermuteBackward0>)



## Addition: f(x1, x2) = x1 + x2

In [128]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 2)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs.sum(dim=1, keepdims=True)

model = init_Neural_Net(
    layer_sizes=(2, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0053, -0.5820],
        [ 0.3793, -0.5204]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.2723],
        [ 0.1896]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.804374635219574


In [129]:
train(
    model,
    batches_to_run=500
)

loss = 0.7832100987434387

loss = 0.22691687941551208

loss = 2.4388882593484595e-05

loss = 3.2461384691373496e-09

loss = 8.1084677817575e-13

Weights:
--------
hidden_layer_0:
tensor([[ 0.8516, -0.9257],
        [ 0.8516, -0.9257]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[ 1.1742],
        [-1.0802]], grad_fn=<PermuteBackward0>)



## Subtraction: f(x1, x2) = x1 - x2

In [130]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 2)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return (Xs[:, 0] - Xs[:, 1]).unsqueeze(1)

model = init_Neural_Net(
    layer_sizes=(2, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0053, -0.5820],
        [ 0.3793, -0.5204]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.2723],
        [ 0.1896]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.640518069267273


In [131]:
train(
    model,
    batches_to_run=500
)

loss = 0.6367942094802856

loss = 0.033380623906850815

loss = 4.164157508057542e-05

loss = 1.5468325287315565e-08

loss = 6.022274259137594e-12

Weights:
--------
hidden_layer_0:
tensor([[-0.8518,  0.9320],
        [ 0.8518, -0.9320]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-1.1740],
        [ 1.0730]], grad_fn=<PermuteBackward0>)



## Minimum Function: f(x1, x2) = min(x1, x2)

In [132]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 2)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs.min(dim=1, keepdims=True).values

model = init_Neural_Net(
    layer_sizes=(2, 3, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=False)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0053, -0.5820, -0.2723],
        [ 0.3793, -0.5204,  0.1896]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.0114],
        [ 0.4578],
        [-0.0512]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.4853660464286804


In [133]:
train(
    model,
    batches_to_run=10000,  # 10k
    print_every=1000  # 1k
)

loss = 0.4657897651195526

loss = 0.00038561291876249015

loss = 1.2959922059962992e-05

loss = 5.170457484382496e-07

loss = 2.097290519031958e-08

loss = 8.531301221026411e-10

loss = 3.610790139108033e-11

loss = 7.4095920371553e-12

loss = 4.984128127577536e-12

loss = 4.303819453599367e-12

Weights:
--------
hidden_layer_0:
tensor([[ 9.5740e-06, -4.7060e-08, -8.5670e-01],
        [ 1.0363e+00, -1.1006e+00,  8.5669e-01]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[ 0.9650],
        [-0.9086],
        [-1.1673]], grad_fn=<PermuteBackward0>)

