In [1]:
from bounce import *
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from utils import pretty_print, functions
from utils.symbolic_network import SymbolicNet
from utils.regularization import L12Smooth
import torch.nn.functional as F
import pytorch_lightning as pl
import numpy as np
import pickle

loader = DataLoader(
    BouncyBallsDataBounceRatio(0.05),
    #BouncyBallsData(),
    batch_size=4096,
    drop_last=True,
    pin_memory=False
)

x, y = next(iter(loader))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
N_TRAIN = 256       # Size of training dataset
N_VAL = 100         # Size of validation dataset
DOMAIN = (-1, 1)    # Domain of dataset - range from which we sample x
# DOMAIN = np.array([[0, -1, -1], [1, 1, 1]])   # Use this format if each input variable has a different domain
N_TEST = 100        # Size of test dataset
DOMAIN_TEST = (-2, 2)   # Domain of test dataset - should be larger than training domain to test extrapolation
NOISE_SD = 0        # Standard deviation of noise for training dataset
var_names = ["x", "y", "z"]

# Standard deviation of random distribution for weight initializations.
init_sd_first = 0.1
init_sd_last = 1.0
init_sd_middle = 0.5
# init_sd_first = 0.5
# init_sd_last = 0.5
# init_sd_middle = 0.5
# init_sd_first = 0.1
# init_sd_last = 0.1
# init_sd_middle = 0.1


def generate_data(func, N, range_min=DOMAIN[0], range_max=DOMAIN[1]):
    """Generates datasets."""
    x_dim = len(signature(func).parameters)     # Number of inputs to the function, or, dimensionality of x
    x = (range_max - range_min) * torch.rand([N, x_dim]) + range_min
    y = torch.tensor([[func(*x_i)] for x_i in x])
    return x, y


class Benchmark:
    """Benchmark object just holds the results directory (results_dir) to save to and the hyper-parameters. So it is
    assumed all the results in results_dir share the same hyper-parameters. This is useful for benchmarking multiple
    functions with the same hyper-parameters."""
    def __init__(self, results_dir, n_layers=2, reg_weight=5e-3, learning_rate=1e-2,
                 n_epochs1=10001, n_epochs2=10001):
        """Set hyper-parameters"""
        self.activation_funcs = [
            *[functions.Constant()] * 2,
            *[functions.Identity()] * 4,
            *[functions.Square()] * 4,
            *[functions.Sin()] * 2,
            *[functions.Exp()] * 2,
            *[functions.Sigmoid()] * 2,
            *[functions.Product()] * 2
        ]

        self.n_layers = n_layers                # Number of hidden layers
        self.reg_weight = reg_weight            # Regularization weight
        self.learning_rate = learning_rate
        self.summary_step = 1000                # Number of iterations at which to print to screen
        self.n_epochs1 = n_epochs1
        self.n_epochs2 = n_epochs2

        if not os.path.exists(results_dir):
            os.makedirs(results_dir)
        self.results_dir = results_dir

        # Save hyperparameters to file
        result = {
            "learning_rate": self.learning_rate,
            "summary_step": self.summary_step,
            "n_epochs1": self.n_epochs1,
            "n_epochs2": self.n_epochs2,
            "activation_funcs_name": [func.name for func in self.activation_funcs],
            "n_layers": self.n_layers,
            "reg_weight": self.reg_weight,
        }
        with open(os.path.join(self.results_dir, 'params.pickle'), "wb+") as f:
            pickle.dump(result, f)

    def benchmark(self, func, func_name, trials):
        """Benchmark the EQL network on data generated by the given function. Print the results ordered by test error.
        Arguments:
            func: lambda function to generate dataset
            func_name: string that describes the function - this will be the directory name
            trials: number of trials to train from scratch. Will save the results for each trial.
        """

        print("Starting benchmark for function:\t%s" % func_name)
        print("==============================================")

        # Create a new sub-directory just for the specific function
        func_dir = os.path.join(self.results_dir, func_name)
        if not os.path.exists(func_dir):
            os.makedirs(func_dir)

        # Train network!
        expr_list, error_test_list = self.train(func, func_name, trials, func_dir)

        # Sort the results by test error (increasing) and print them to file
        # This allows us to easily count how many times it fit correctly.
        error_expr_sorted = sorted(zip(error_test_list, expr_list))     # List of (error, expr)
        error_test_sorted = [x for x, _ in error_expr_sorted]   # Separating out the errors
        expr_list_sorted = [x for _, x in error_expr_sorted]    # Separating out the expr

        fi = open(os.path.join(self.results_dir, 'eq_summary.txt'), 'a')
        fi.write("\n{}\n".format(func_name))
        for i in range(trials):
            fi.write("[%f]\t\t%s\n" % (error_test_sorted[i], str(expr_list_sorted[i])))
        fi.close()

    def train(self, func, func_name='', trials=1, func_dir='results/test'):
        """Train the network to find a given function"""

        use_cuda = torch.cuda.is_available()
        device = torch.device("cuda:0" if use_cuda else "cpu")
        print("Use cuda:", use_cuda, "Device:", device)

        x, y = generate_data(func, N_TRAIN)
        data, target = x.to(device), y.to(device)
        # x_val, y_val = generate_data(func, N_VAL)
        x_test, y_test = generate_data(func, N_TEST, range_min=DOMAIN_TEST[0], range_max=DOMAIN_TEST[1])
        test_data, test_target = x_test.to(device), y_test.to(device)

        # Setting up the symbolic regression network
        x_dim = len(signature(func).parameters)  # Number of input arguments to the function
        width = len(self.activation_funcs)
        n_double = functions.count_double(self.activation_funcs)

        # Arrays to keep track of various quantities as a function of epoch
        loss_list = []          # Total loss (MSE + regularization)
        error_list = []         # MSE
        reg_list = []           # Regularization
        error_test_list = []    # Test error

        error_test_final = []
        eq_list = []

        for trial in range(trials):
            print("Training on function " + func_name + " Trial " + str(trial+1) + " out of " + str(trials))

            # reinitialize for each trial
            net = SymbolicNet(self.n_layers,
                              funcs=self.activation_funcs,
                              initial_weights=[
                                  # kind of a hack for truncated normal
                                  torch.fmod(torch.normal(0, init_sd_first, size=(x_dim, width + n_double)), 2),
                                  torch.fmod(torch.normal(0, init_sd_middle, size=(width, width + n_double)), 2),
                                  torch.fmod(torch.normal(0, init_sd_middle, size=(width, width + n_double)), 2),
                                  torch.fmod(torch.normal(0, init_sd_last, size=(width, 1)), 2)
                              ]).to(device)

            loss_val = np.nan
            while np.isnan(loss_val):
                # training restarts if gradients blow up
                criterion = nn.MSELoss()
                optimizer = optim.RMSprop(net.parameters(),
                                          lr=self.learning_rate * 10,
                                          alpha=0.9,  # smoothing constant
                                          eps=1e-10,
                                          momentum=0.0,
                                          centered=False)

                # adaptive learning rate
                lmbda = lambda epoch: 0.1
                scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda)
                # for param_group in optimizer.param_groups:
                #     print("Learning rate: %f" % param_group['lr'])

                t0 = time.time()

                # First stage of training, preceded by 0th warmup stage
                for epoch in range(self.n_epochs1 + 2000):
                    optimizer.zero_grad()  # zero the parameter gradients
                    outputs = net(data)  # forward pass
                    regularization = L12Smooth()
                    mse_loss = criterion(outputs, target)

                    reg_loss = regularization(net.get_weights_tensor())
                    loss = mse_loss + self.reg_weight * reg_loss
                    loss.backward()
                    optimizer.step()

                    if epoch % self.summary_step == 0:
                        error_val = mse_loss.item()
                        reg_val = reg_loss.item()
                        loss_val = loss.item()
                        error_list.append(error_val)
                        reg_list.append(reg_val)
                        loss_list.append(loss_val)

                        with torch.no_grad():  # test error
                            test_outputs = net(test_data)
                            test_loss = F.mse_loss(test_outputs, test_target)
                            error_test_val = test_loss.item()
                            error_test_list.append(error_test_val)

                        print("Epoch: %d\tTotal training loss: %f\tTest error: %f" % (epoch, loss_val, error_test_val))

                        if np.isnan(loss_val) or loss_val > 1000:  # If loss goes to NaN, restart training
                            break

                    if epoch == 2000:
                        scheduler.step()  # lr /= 10

                scheduler.step()  # lr /= 10 again

                for epoch in range(self.n_epochs2):
                    optimizer.zero_grad()  # zero the parameter gradients
                    outputs = net(data)
                    regularization = L12Smooth()
                    mse_loss = criterion(outputs, target)
                    reg_loss = regularization(net.get_weights_tensor())
                    loss = mse_loss + self.reg_weight * reg_loss
                    loss.backward()
                    optimizer.step()

                    if epoch % self.summary_step == 0:
                        error_val = mse_loss.item()
                        reg_val = reg_loss.item()
                        loss_val = loss.item()
                        error_list.append(error_val)
                        reg_list.append(reg_val)
                        loss_list.append(loss_val)

                        with torch.no_grad():  # test error
                            test_outputs = net(test_data)
                            test_loss = F.mse_loss(test_outputs, test_target)
                            error_test_val = test_loss.item()
                            error_test_list.append(error_test_val)

                        print("Epoch: %d\tTotal training loss: %f\tTest error: %f" % (epoch, loss_val, error_test_val))

                        if np.isnan(loss_val) or loss_val > 1000:  # If loss goes to NaN, restart training
                            break

                t1 = time.time()

            tot_time = t1-t0
            print(tot_time)

            # Print the expressions
            with torch.no_grad():
                weights = net.get_weights()
                expr = pretty_print.network(weights, self.activation_funcs, var_names[:x_dim])
                print(expr)

            # Save results
            trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial)
            results = {
                "weights": weights,
                "loss_list": loss_list,
                "error_list": error_list,
                "reg_list": reg_list,
                "error_test": error_test_list,
                "expr": expr,
                "runtime": tot_time
            }
            with open(trial_file, "wb+") as f:
                pickle.dump(results, f)

            error_test_final.append(error_test_list[-1])
            eq_list.append(expr)

        return eq_list, error_test_final

In [None]:
We use two phases of training, where the first phases uses
a learning rate of 10−2
and regularization weight λ = 0.05.
The second phase uses a learning rate of 10−4
and no
regularization. The small weights are frozen between the first
and second phase with a threshold of α = 0.01. Each phase
is trained for 10000 iterations.

In [3]:
from inspect import signature
import time
bench = Benchmark(results_dir = 'test_results')

bench.benchmark(lambda x,y,z: x + 0.016667033*y, func_name="speed", trials=1)

Starting benchmark for function:	speed
Use cuda: True Device: cuda:0
Training on function speed Trial 1 out of 1
Epoch: 0	Total training loss: 269.628571	Test error: 98177.359375
Epoch: 1000	Total training loss: 0.710146	Test error: 0.098058
Epoch: 2000	Total training loss: 0.893777	Test error: 23.095833
Epoch: 3000	Total training loss: 0.319213	Test error: 0.001261
Epoch: 4000	Total training loss: 0.317061	Test error: 0.001940
Epoch: 5000	Total training loss: 0.319367	Test error: 0.001573
Epoch: 6000	Total training loss: 0.316014	Test error: 0.000301
Epoch: 7000	Total training loss: 0.316480	Test error: 0.000579
Epoch: 8000	Total training loss: 0.316431	Test error: 0.000621
Epoch: 9000	Total training loss: 0.317137	Test error: 0.001095
Epoch: 10000	Total training loss: 0.316542	Test error: 0.000487
Epoch: 11000	Total training loss: 0.318005	Test error: 0.001293
Epoch: 12000	Total training loss: 0.316277	Test error: 0.000589
Epoch: 0	Total training loss: 0.316336	Test error: 0.000369
E

In [4]:
from inspect import signature
import time
bench = Benchmark(results_dir = 'test_results')

bench.benchmark(lambda x,y,z: 0.016667033*y, func_name="speed", trials=1)

Starting benchmark for function:	speed
Use cuda: True Device: cuda:0
Training on function speed Trial 1 out of 1
Epoch: 0	Total training loss: 63.937401	Test error: 92487.304688
Epoch: 1000	Total training loss: 0.482008	Test error: 0.002076
Epoch: 2000	Total training loss: 0.542434	Test error: 0.005263
Epoch: 3000	Total training loss: 0.302932	Test error: 0.000328
Epoch: 4000	Total training loss: 0.303226	Test error: 0.000432
Epoch: 5000	Total training loss: 0.302859	Test error: 0.000314
Epoch: 6000	Total training loss: 0.303039	Test error: 0.000336
Epoch: 7000	Total training loss: 0.302851	Test error: 0.000313
Epoch: 8000	Total training loss: 0.303774	Test error: 0.000395
Epoch: 9000	Total training loss: 0.303601	Test error: 0.000445
Epoch: 10000	Total training loss: 0.302887	Test error: 0.000317
Epoch: 11000	Total training loss: 0.302873	Test error: 0.000314
Epoch: 12000	Total training loss: 0.302854	Test error: 0.000313
Epoch: 0	Total training loss: 0.302850	Test error: 0.000313
Epo

In [7]:
from inspect import signature
import time
bench = Benchmark(results_dir = 'test_results')

bench.benchmark(lambda x,y,z: y, func_name="speed", trials=1)

Starting benchmark for function:	speed
Use cuda: True Device: cuda:0
Training on function speed Trial 1 out of 1
Epoch: 0	Total training loss: 420.839264	Test error: 21892716869252546560.000000
Epoch: 1000	Total training loss: 0.544444	Test error: 0.140753
Epoch: 2000	Total training loss: 0.847831	Test error: 0.141156
Epoch: 3000	Total training loss: 0.318065	Test error: 0.001854
Epoch: 4000	Total training loss: 0.316454	Test error: 0.000406
Epoch: 5000	Total training loss: 0.316955	Test error: 0.001926
Epoch: 6000	Total training loss: 0.316154	Test error: 0.000767
Epoch: 7000	Total training loss: 0.315882	Test error: 0.000060
Epoch: 8000	Total training loss: 0.315769	Test error: 0.000017
Epoch: 9000	Total training loss: 0.315777	Test error: 0.000020
Epoch: 10000	Total training loss: 0.316067	Test error: 0.000146
Epoch: 11000	Total training loss: 0.316498	Test error: 0.001048
Epoch: 12000	Total training loss: 0.316235	Test error: 0.000398
Epoch: 0	Total training loss: 0.316170	Test err

In [8]:
from inspect import signature
import time
bench = Benchmark(results_dir = 'test_results')

bench.benchmark(lambda x, y: x + y, func_name="speed", trials=1)

Starting benchmark for function:	speed
Use cuda: True Device: cuda:0
Training on function speed Trial 1 out of 1
Epoch: 0	Total training loss: 9.287493	Test error: 5521.826172
Epoch: 1000	Total training loss: 0.814877	Test error: 0.681931
Epoch: 2000	Total training loss: 0.645772	Test error: 0.376138
Epoch: 3000	Total training loss: 0.328241	Test error: 0.001390
Epoch: 4000	Total training loss: 0.321130	Test error: 0.001399
Epoch: 5000	Total training loss: 0.314526	Test error: 0.000514
Epoch: 6000	Total training loss: 0.316415	Test error: 0.002403
Epoch: 7000	Total training loss: 0.315625	Test error: 0.002531
Epoch: 8000	Total training loss: 0.315473	Test error: 0.001007
Epoch: 9000	Total training loss: 0.315182	Test error: 0.001077
Epoch: 10000	Total training loss: 0.315293	Test error: 0.001676
Epoch: 11000	Total training loss: 0.315422	Test error: 0.000728
Epoch: 12000	Total training loss: 0.315419	Test error: 0.001798
Epoch: 0	Total training loss: 0.316061	Test error: 0.001027
Epoch

In [None]:
# run separately for the different dimensions to get the eqnet result with the current setup
# run on real data (or just with some noise initially)
# what happens if we run it on the remainder? bounce cases?