# Symbolic regression via neural network weights

## Create equations and respective datasets

We create random quadratic equations in the form +/- ax^2 +/- bx +/- c; and generate X, y pairs for the said equations using the following routine.

In [7]:
from random_quadratics import RandomQuadratic

In [3]:
lower_bound = -5
upper_bound = 5
round_digits = 3
number_of_equations = 10

equations = []

for _ in range(number_of_equations):
    equation = RandomQuadratic(lower_bound=lower_bound, upper_bound=upper_bound, round_digits=round_digits)
    equations.append(equation())


In [8]:
from numpy.random import uniform
from generate_datasets import GenerateDatasets
from json import dumps
from os import mkdir, path

X_values = uniform(-1, 1, 5000).astype(dtype="float64", copy=False)
root_dir = "./datasets"
dataset_generator = GenerateDatasets(equations=equations, X_values=X_values)
datesets_dict = dataset_generator.generate_xy_datasets()
# dumps makes the float X values in keys into strings

print(dumps(datesets_dict, indent=4))
GenerateDatasets.write_dataset_to_file(datesets_dict, root_dir=root_dir)

NameError: name 'equations' is not defined

Import the saved X,y pairs for the respective equations to pytorch dataloader.

In [9]:
from equations_dataset import EquationsDataset
from json import load
from torch.utils.data import Dataset, DataLoader, random_split
from torch import set_printoptions

root_dir = "./datasets"
equation_data = EquationsDataset(dataset_file_path=f"{root_dir}/Equation1.json")
#data_loader = DataLoader(equation_data, batch_size=1, shuffle=True)
#for idx, xy_values in enumerate(data_loader):
    #print(f"XY at position {idx} is {xy_values}")

In [22]:
#print(data_loader)
#print(equation_data.x_values)

#train, test = random_split(data_loader, [4000, 1000])

#print(vars(train))
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
#train_loader = DataLoader(train, batch_size=1, shuffle=True)
#test_loader = DataLoader(test, batch_size=1, shuffle=True)
#print(vars(train_loader))
#print(train_loader.X)

train_values_x, train_values_y = equation_data.x_values[:4000], equation_data.y_values[:4000] 
test_values_x, test_values_y = equation_data.x_values[4000:], equation_data.x_values[4000:] 

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(train_values_x, train_values_y, test_size=0.33, random_state=42)

train_data = []
for i in range(len(X_train)):
    train_data.append([X_train[i], y_train[i]])



val_data = []
for i in range(len(X_val)):
    val_data.append([X_val[i], y_val[i]])

2680
0.6399736243380976
2.016488932252981
1320
-0.6630430210937348
7.27321978043593


In [25]:
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
#for idx, xy_values in enumerate(train_loader):
    #print(f"XY at position {idx} is {xy_values}")

val_loader = DataLoader(val_data, batch_size=1, shuffle=True)
#for idx, xy_values in enumerate(val_loader):
    #print(f"XY at position {idx} is {xy_values}")

## Simple MLP to regression on our tasks

In [26]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [45]:
def train_model(train_dataloader:DataLoader, validation_dataloader:DataLoader, epochs, model:nn.Module, optimizer, scheduler, criterion):
    train_losses = []
    validation_losses = []

    #train-validation loop
    for epoch in range(epochs):
        batch_losses = []
        training_loss = 0.0
        #training loop
        for _idx , data in enumerate(train_dataloader):
            inputs, labels = data
            optimizer.zero_grad()
            model.train()
            outputs = model(inputs.float())
            loss = criterion(outputs.float(), labels.float())
            loss.backward()
            batch_losses.append(loss.item())
            optimizer.step()
        training_loss = np.mean(batch_losses)
        train_losses.append(training_loss)
        scheduler.step()

        #validation loop
        with torch.no_grad():
            val_losses = []
            validation_loss = 0.0
            for _idx, data in enumerate(validation_dataloader):
                inputs, labels = data
                model.eval()
                outputs = model(inputs.float())
                loss = criterion(outputs.float(), labels.float())
                val_losses.append(loss.item())
            validation_loss = np.mean(val_losses)
            validation_losses.append(validation_loss)

        print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")
        print(f"\t Label value: {labels.float().item()}\t Predicted Output: {outputs.float().item()}")
    #torch.save(model.state_dict(), MODEL_PATH)
    return model.state_dict(), train_losses, validation_losses

def eval_model(test_dataloader: DataLoader, model: nn.Module, criterion):
    test_losses = []
    with torch.no_grad():
        for _idx, data in enumerate(test_dataloader):
            inputs, labels = data
            model.eval()
            outputs = model(inputs)
            #print("outputs, ", outputs.shape)
            #rescaled_outputs = inverse_scaler(outputs, method="minmax")
            #print("rescaled_outputs: ",rescaled_outputs.shape)
            loss = criterion(outputs, labels)
            test_losses.append(loss.item())
        test_loss = np.mean(test_losses)
        print(f"Final test loss: {test_loss:.4f}")    
    return test_losses

In [46]:
class Test_MLP(nn.Module):

    def __init__(self, input_size, hidden_size, output_size):
        # call constructor from superclass
        super(FNN_WO_HPO_3Layer, self).__init__()
        # define network layers
        self.input_size = input_size
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.sigmoid1 = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.sigmoid2 = nn.Sigmoid()
        self.fc3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # define forward pass
        output = self.fc1(x)
        output = self.sigmoid1(output)
        output = self.fc2(output)
        output = self.sigmoid2(output)
        output = self.fc3(output)
        return output

In [None]:
model = Test_MLP(1, 35, 1)
print(model)

epochs = 100
optimizer  = optim.Adam(model.parameters(), lr=1e-05)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, eta_min=1e-05)
criterion = nn.MSELoss()

train_model(train_loader, val_loader, epochs, model, optimizer, scheduler, criterion)

FNN_WO_HPO_3Layer(
  (fc1): Linear(in_features=1, out_features=35, bias=True)
  (sigmoid1): Sigmoid()
  (fc2): Linear(in_features=35, out_features=35, bias=True)
  (sigmoid2): Sigmoid()
  (fc3): Linear(in_features=35, out_features=1, bias=True)
)
[1] Training loss: 24.5478	 Validation loss: 21.3039
	 Label value: 5.026393413543701	 Predicted Output: 0.48561739921569824
[2] Training loss: 19.0194	 Validation loss: 16.5044
	 Label value: 2.187842607498169	 Predicted Output: 1.1526610851287842
[3] Training loss: 14.8895	 Validation loss: 12.9721
	 Label value: 6.9093804359436035	 Predicted Output: 1.798307180404663
[4] Training loss: 11.8893	 Validation loss: 10.4810
	 Label value: 2.1449997425079346	 Predicted Output: 2.301635503768921
[5] Training loss: 9.8673	 Validation loss: 8.8607
	 Label value: 4.082357883453369	 Predicted Output: 2.779758930206299
[6] Training loss: 8.5345	 Validation loss: 7.8040
	 Label value: 9.092066764831543	 Predicted Output: 3.19685697555542
[7] Training lo

[66] Training loss: 1.5642	 Validation loss: 1.4352
	 Label value: 3.304640769958496	 Predicted Output: 4.57495641708374
[67] Training loss: 1.5372	 Validation loss: 1.4098
	 Label value: 2.7142317295074463	 Predicted Output: 0.9303058981895447
[68] Training loss: 1.5113	 Validation loss: 1.3854
	 Label value: 3.5728516578674316	 Predicted Output: 4.7899041175842285
[69] Training loss: 1.4860	 Validation loss: 1.3623
	 Label value: 2.275174379348755	 Predicted Output: 1.2542731761932373


## Use AutoPytorch to search for MLP architectures

In [1]:
#%pip list
#from autoPyTorch.api.tabular_classification import TabularClassificationTask
from autoPyTorch import AutoNetRegression
from autoPyTorch.data_management.data_manager import DataManager


# Note: You can write your own datamanager! Call fit train, valid data (numpy matrices) 
#dm = DataManager()
#dm.generate_regression(num_features=21, num_samples=1500)

# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
#autonet = AutoNetRegression(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info')

#res = autonet.fit(X_train=X_train, Y_train=y_train, X_valid=X_val, Y_valid=y_val)
print(res)

ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject