# Transformer Implementation

## Create equations to mlp datasets from trained networks

We create quadratic equations in the form +/- ax^2 +/- bx +/- c as our targets to the input values as our network weights.

In [45]:
def parse_equation_json(dataset_file_path):
    with open(dataset_file_path) as json_file:
        data = load(json_file)
        #print(data)
        x_values = list(data.keys())
        y_values = list(data.values())
        assert len(x_values) == len(y_values)
    return x_values, y_values


create_dataset()
x_values, y_values = parse_equation_json(f"{root_dir}/equations_to_mlp_weights.json")

train_values_x, train_values_y = x_values[:4000], y_values[:4000] 
test_values_x, test_values_y = x_values[4000:], x_values[4000:] 

In [46]:
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

X_train, X_val, y_train, y_val = train_test_split(train_values_x, train_values_y, test_size=0.33, random_state=42)

train_data = []
for i in range(len(X_train)):
    train_data.append([X_train[i], torch.FloatTensor(y_train[i])])

val_data = []
for i in range(len(X_val)):
    val_data.append([X_val[i], torch.FloatTensor(y_val[i])])

train_loader = DataLoader(train_data, batch_size=1, shuffle=True)

val_loader = DataLoader(val_data, batch_size=1, shuffle=True)

## MLP to Decoder Architecture in our implementation

### We process the network weights through a MLP and then use the learned representation as input to our decoder model

In [47]:
import numpy as np
import torch.nn as nn
import torch.optim as optim

['-1.2*x*x+1.3*x-1',
 tensor([-5.6723e-01, -4.2020e-01,  1.3257e+00,  7.5389e-01,  8.2234e-02,
         -5.8438e-01, -9.7138e-01,  4.4880e-01,  2.2155e-01, -3.1306e-01,
         -9.2360e-04, -3.4064e-01, -1.6338e-02, -3.2545e-01, -4.2002e-03,
         -1.5795e-01, -2.0448e-02, -3.3488e-01,  3.3168e-01, -2.7872e-02,
          3.0185e-01,  2.3806e-02, -2.9166e-01, -2.7286e-01, -1.5448e-02,
         -3.4943e-02, -3.2462e-01,  6.1753e-01,  3.4095e-02, -1.2253e-01,
         -6.8272e-02,  5.5982e-01,  4.1066e-03,  1.9089e-01, -2.6787e-01,
         -3.3988e-01, -2.7152e-01, -9.2338e-02, -1.2238e-01,  3.3914e-01,
          4.4633e-01,  2.8089e-01,  3.9832e-02, -2.3664e-01, -1.4197e-01,
          7.1590e-01,  5.9507e-01, -2.1987e-01, -2.0779e-01, -5.6828e-02,
          3.5495e-01,  3.3513e-01, -1.5567e-02,  2.0529e-01, -2.1851e-01,
          3.1577e-01, -2.2781e-01,  1.4184e-01,  1.7817e-01, -1.5977e-03,
         -1.5645e-01,  3.2901e-01, -3.1186e-01, -1.8062e-01,  2.5690e-01,
          2.6784e

In [54]:
class Encoder_MLP(nn.Module):

    def __init__(self, input_size, hidden_size, output_size):
        # call constructor from superclass
        super(Encoder_MLP, self).__init__()
        # define network layers
        self.input_size = input_size
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size*2)
        self.fc4 = nn.Linear(hidden_size*2, output_size)
        self.relu = nn.ReLU()
        self.layerNorm = nn.LayerNorm(input_size)
        
    def forward(self, x):
        # define forward pass
        output = self.fc1(x)
        output = self.relu(output)
        #output = self.layerNorm(output)
        output = self.fc2(output)
        #output = self.layerNorm(output)
        output = self.relu(output)
        output = self.fc3(output)
        #output = self.layerNorm(output)
        output = self.relu(output)
        output = self.fc4(output)
        return output

In [56]:
def train_model(train_dataloader:DataLoader, validation_dataloader:DataLoader, epochs, model:nn.Module, optimizer, scheduler, criterion):
    train_losses = []
    validation_losses = []

    #train-validation loop
    for epoch in range(epochs):
        batch_losses = []
        training_loss = 0.0
        #training loop
        for _idx , data in enumerate(train_dataloader):
            equation, mlp_weights = data
            print(equation)
            print(mlp_weights)
            optimizer.zero_grad()
            model.train()
            outputs = model(mlp_weights.float())
            print(outputs)
            print(outputs.shape)
            loss = criterion(outputs.float(), labels.float())
            loss.backward()
            batch_losses.append(loss.item())
            optimizer.step()
        training_loss = np.mean(batch_losses)
        train_losses.append(training_loss)
        scheduler.step()

        #validation loop
        with torch.no_grad():
            val_losses = []
            validation_loss = 0.0
            for _idx, data in enumerate(validation_dataloader):
                inputs, labels = data
                model.eval()
                outputs = model(inputs.float())
                loss = criterion(outputs.float(), labels.float())
                val_losses.append(loss.item())
            validation_loss = np.mean(val_losses)
            validation_losses.append(validation_loss)

        print(f"[{epoch+1}] Training loss: {training_loss:.7f}\t Validation loss: {validation_loss:.7f}")
        print(f"\t Label value: {labels.float().item()}\t Predicted Output: {outputs.float().item()}")
    #torch.save(model.state_dict(), MODEL_PATH)
    return model.state_dict()

def eval_model(test_dataloader: DataLoader, model: nn.Module, criterion):
    test_losses = []
    with torch.no_grad():
        for _idx, data in enumerate(test_dataloader):
            inputs, labels = data
            model.eval()
            outputs = model(inputs)
            #print("outputs, ", outputs.shape)
            #rescaled_outputs = inverse_scaler(outputs, method="minmax")
            #print("rescaled_outputs: ",rescaled_outputs.shape)
            loss = criterion(outputs, labels)
            test_losses.append(loss.item())
        test_loss = np.mean(test_losses)
        print(f"Final test loss: {test_loss:.4f}")    
    return test_losses

In [57]:
model = Encoder_MLP(128, 256, 512)
print(model)

epochs = 1
optimizer  = optim.Adam(model.parameters(), lr=1e-05)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, eta_min=1e-05)
criterion = nn.MSELoss()

mf_dict =train_model(train_loader, val_loader, epochs, model, optimizer, scheduler, criterion)


Encoder_MLP(
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=512, bias=True)
  (fc4): Linear(in_features=512, out_features=512, bias=True)
  (relu): ReLU()
  (layerNorm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)
('+0.4*x*x-3*x-4',)
tensor([[ 0.4607, -0.6348, -1.1444, -0.2364,  1.0517, -0.4547,  0.7143, -0.8074,
          0.5914,  0.0328, -0.3927,  0.4257,  0.6577,  0.3466,  0.4597, -0.0125,
          0.0529, -0.0317,  0.0373, -0.1681, -0.0171, -0.1467, -0.1179,  0.3150,
          0.6529,  0.2568, -0.2176, -0.0025,  0.2332,  0.0062,  0.7766, -0.2544,
         -0.1316, -0.3324,  0.3309,  0.4621, -0.3207,  0.3432,  0.0976, -0.0318,
         -0.3379,  0.1935, -0.0147,  0.2849, -0.3320,  0.1327,  0.1421, -0.2154,
         -0.1293, -0.3491,  0.2319, -0.2571, -0.2726, -0.2270, -0.2554, -0.0509,
         -0.1492,  0.2148,  0.6068,  0.3771,  0.0486,  0.5961, -

UnboundLocalError: local variable 'labels' referenced before assignment