# Mathematical Circuits

In [1]:
import torch
from torch import nn

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {DEVICE}")

Using cpu


## Helpers

In [2]:
def Neural_Net(layer_sizes, bias=True, verbose=True):
    assert(len(layer_sizes) >= 2)
    
    # Get individual layer pieces:
    #  - Each layer is linear
    #  - All hidden layers have ReLU activation
    layers = []
    for i in range(len(layer_sizes) - 1):
        if i == len(layer_sizes) - 2:  # Last (output) layer
            layers += [nn.Linear(layer_sizes[i], layer_sizes[i+1], bias=bias)]
        else:
            layers += [
                nn.Linear(layer_sizes[i], layer_sizes[i+1], bias=bias),
                nn.ReLU()
            ]
    
    # Model specification:
    class NeuralNetwork(nn.Module):
        def __init__(self):
            super(NeuralNetwork, self).__init__()
            self.NN = nn.Sequential(*layers)

        def forward(self, x):
            logits = self.NN(x)
            return logits
        
        def print_model_parameters(self):
            print("Weights:")
            print("--------")

            all_weights = list(self.parameters())
            for i, layer_weights in enumerate(all_weights):
                if i != (len(all_weights) - 1):
                    print(f"hidden_layer_{i}:")
                else:
                    print("output_layer:")

                print(layer_weights.T)
                print("")

        def _set_data_generators(self, generate_Xs, get_Ys):
            self.generate_Xs = generate_Xs
            self.get_Ys = get_Ys

        def _init_loss_optimizer(self, lr=1e-4):
            self.loss_fn = nn.MSELoss()
            self.optimizer = torch.optim.SGD(self.parameters(), lr=lr)

        def _init_test_data(self, test_n=1000, verbose=True):
            self.test_X = self.generate_Xs(n=test_n)
            self.test_y = self.get_Ys(self.test_X)

            predicted = self(self.test_X)
            loss = self.loss_fn(predicted, self.test_y).item()
            if verbose: print("Starting Loss =", loss)
    
    # Model initialization:
    model = NeuralNetwork().to(DEVICE)
    if verbose:
        print(model)
        print("")
        model.print_model_parameters()
    return model

def init_Neural_Net(layer_sizes, generate_Xs, get_Ys, bias=True, lr=1e-4, test_n=1000, verbose=True):
    model = Neural_Net(layer_sizes, bias, verbose)
    model._set_data_generators(generate_Xs, get_Ys)
    model._init_loss_optimizer(lr)
    model._init_test_data(test_n, verbose)
    return model

def train(model, batch_size=1000, batches_to_run=300000):
    for i in range(batches_to_run):
        # Put model into training mode:
        model.train()
        
        # Get current batch data
        train_X = model.generate_Xs(n=batch_size)
        train_y = model.get_Ys(train_X)
        
        # Do forward pass and evaluate loss
        predicted = model(train_X)
        loss = model.loss_fn(predicted, train_y)

        # Backpropagation
        model.optimizer.zero_grad()
        loss.backward()
        model.optimizer.step()

        # Reporting
        if i % 30000 == 0:
            model.eval()
            predicted = model(model.test_X)
            loss = model.loss_fn(predicted, model.test_y)
            print("loss =", loss.item())
            # model.print_model_parameters()
            print("")

    model.print_model_parameters()

## Identity Function: f(x) = x

In [3]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 1)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs

model = init_Neural_Net(
    layer_sizes=(1, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0075,  0.5364]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.5820],
        [-0.5204]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.45063677430152893


In [4]:
train(
    model,
    batches_to_run=300000  # 300k
)

loss = 0.45062240958213806

loss = 0.2400665134191513

loss = 0.1759452372789383

loss = 0.16171278059482574

loss = 0.11106877028942108

loss = 0.021715493872761726

loss = 0.0008291789563372731

loss = 1.7237687643500976e-05

loss = 4.432341427218489e-07

loss = 1.9709477783180773e-07

Weights:
--------
hidden_layer_0:
tensor([[-0.9192,  1.0035]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-1.0872],
        [ 0.9957]], grad_fn=<PermuteBackward0>)



## Negative Identity Function: f(x) = -x

In [5]:
# NOTE: Random seed 0 gets stuck in a local optimum where it works very well for positive inputs,
# but always returns 0 for negative inputs:
torch.manual_seed(1)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 1)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs * -1

model = init_Neural_Net(
    layer_sizes=(1, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[ 0.5153, -0.4414]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.1371],
        [ 0.3319]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.26709434390068054


In [6]:
train(
    model,
    batches_to_run=300000  # 300k
)

loss = 0.2670891582965851

loss = 0.07879319787025452

loss = 0.004247117787599564

loss = 9.394864173373207e-05

loss = 1.610331992196734e-06

loss = 2.0403335554419755e-07

loss = 1.9260137662513444e-07

loss = 1.8912245991486998e-07

loss = 1.8678449009712494e-07

loss = 1.849424222655216e-07

Weights:
--------
hidden_layer_0:
tensor([[ 1.0627, -1.0206]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.9403],
        [ 0.9790]], grad_fn=<PermuteBackward0>)



## Absolute Value: f(x) = |x|

In [7]:
# NOTE: Random seed 0 gets stuck in a local optimum where the weight one of the hidden nodes gets zeroed out.
# As a result ends up working for only positive inputs, but always returns 0 for negative inputs:
torch.manual_seed(1)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 1)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return torch.abs(Xs)

model = init_Neural_Net(
    layer_sizes=(1, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=1, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[ 0.5153, -0.4414]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.1371],
        [ 0.3319]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.31527942419052124


In [8]:
train(
    model,
    batches_to_run=300000  # 300k
)

loss = 0.31527337431907654

loss = 0.1359097808599472

loss = 0.017552969977259636

loss = 0.0005560762365348637

loss = 1.0874651707126759e-05

loss = 3.6069971542929125e-07

loss = 1.9506188664308866e-07

loss = 1.9009961249594198e-07

loss = 1.8731030593244213e-07

loss = 1.8525956591020076e-07

Weights:
--------
hidden_layer_0:
tensor([[ 1.0627, -1.0206]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[0.9403],
        [0.9790]], grad_fn=<PermuteBackward0>)



## Addition: f(x1, x2) = x1 + x2

In [9]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 2)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs.sum(dim=1, keepdims=True)

model = init_Neural_Net(
    layer_sizes=(2, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0053, -0.5820],
        [ 0.3793, -0.5204]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.2723],
        [ 0.1896]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.804374635219574


In [10]:
train(
    model,
    batches_to_run=300000  # 300k
)

loss = 0.8043529391288757

loss = 0.4202917516231537

loss = 0.345365971326828

loss = 0.2909817397594452

loss = 0.06650708615779877

loss = 0.0024126311764121056

loss = 0.0001487390254624188

loss = 1.0628493328113109e-05

loss = 8.306866448037908e-07

loss = 3.158412766879337e-07

Weights:
--------
hidden_layer_0:
tensor([[ 0.8511, -0.9304],
        [ 0.8522, -0.9291]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[ 1.1742],
        [-1.0755]], grad_fn=<PermuteBackward0>)



## Subtraction: f(x1, x2) = x1 - x2

In [11]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 2)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return (Xs[:, 0] - Xs[:, 1]).unsqueeze(1)

model = init_Neural_Net(
    layer_sizes=(2, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=False)
    (1): ReLU()
    (2): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[-0.0053, -0.5820],
        [ 0.3793, -0.5204]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-0.2723],
        [ 0.1896]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.640518069267273


In [12]:
train(
    model,
    batches_to_run=300000  # 300k
)

loss = 0.6405144333839417

loss = 0.42583832144737244

loss = 0.23183923959732056

loss = 0.05851614475250244

loss = 0.01078260038048029

loss = 0.0016846817452460527

loss = 0.0001997139333980158

loss = 2.0616595065803267e-05

loss = 2.022708258664352e-06

loss = 3.452058763286914e-07

Weights:
--------
hidden_layer_0:
tensor([[-0.8519,  0.9296],
        [ 0.8508, -0.9309]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[-1.1746],
        [ 1.0750]], grad_fn=<PermuteBackward0>)



## Minimum Function: f(x1, x2) = min(x1, x2)

(Currently gets stuck in local minimum)

In [15]:
torch.manual_seed(0)  # Random seed

def generate_Xs(n):
    return torch.rand((n, 2)) * 2 - 1  # Uniform random between -1 and 1

def get_Ys(Xs):
    return Xs.min(dim=1, keepdims=True).values

model = init_Neural_Net(
    layer_sizes=(2, 3, 2, 1),
    bias=False,
    generate_Xs=generate_Xs,
    get_Ys=get_Ys
)

NeuralNetwork(
  (NN): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=False)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=2, bias=False)
    (3): ReLU()
    (4): Linear(in_features=2, out_features=1, bias=False)
  )
)

Weights:
--------
hidden_layer_0:
tensor([[ 0.3643, -0.1371, -0.6657],
        [-0.3121,  0.3319,  0.4241]], grad_fn=<PermuteBackward0>)

hidden_layer_1:
tensor([[-0.1188, -0.0707],
        [ 0.2937,  0.1601],
        [ 0.0803,  0.0285]], grad_fn=<PermuteBackward0>)

output_layer:
tensor([[ 0.2583],
        [-0.2756]], grad_fn=<PermuteBackward0>)

Starting Loss = 0.33890464901924133


In [16]:
train(
    model,
    batches_to_run=3000000  # 3M
)

loss = 0.33890366554260254

loss = 0.3093772232532501

loss = 0.24279135465621948

loss = 0.16426093876361847

loss = 0.14048132300376892

loss = 0.13215050101280212

loss = 0.12342052906751633

loss = 0.09989515691995621

loss = 0.0739549994468689

loss = 0.06282414495944977

loss = 0.05912776291370392

loss = 0.05712759494781494

loss = 0.0558374784886837

loss = 0.05497528240084648

loss = 0.05437467247247696

loss = 0.05397646501660347

loss = 0.05370553582906723

loss = 0.05352906510233879

loss = 0.053397610783576965

loss = 0.053293194621801376

loss = 0.05321613699197769

loss = 0.0531618557870388

loss = 0.05312013253569603

loss = 0.05308714136481285

loss = 0.05306166037917137

loss = 0.05304386094212532

loss = 0.05303012579679489

loss = 0.05302044004201889

loss = 0.05301380902528763

loss = 0.053008321672677994

loss = 0.053004421293735504

loss = 0.05300185829401016

loss = 0.052999347448349

loss = 0.052997689694166183

loss = 0.052995797246694565

loss = 0.05299407616