diff --git a/src/pymor/models/neural_network.py b/src/pymor/models/neural_network.py index e833c0ad32..8da256d0c3 100644 --- a/src/pymor/models/neural_network.py +++ b/src/pymor/models/neural_network.py @@ -67,7 +67,7 @@ def __init__(self, neural_network, parameters={}, output_functional=None, def _compute_solution(self, mu=None, **kwargs): # convert the parameter `mu` into a form that is usable in PyTorch - converted_input = torch.from_numpy(mu.to_numpy()).double() + converted_input = torch.DoubleTensor(mu.to_numpy()) # obtain (reduced) coordinates by forward pass of the parameter values # through the neural network U = self.neural_network(converted_input).data.numpy() @@ -141,7 +141,7 @@ def _compute_solution(self, mu=None, **kwargs): for i in range(self.nt): mu = mu.with_(t=t) # convert the parameter `mu` into a form that is usable in PyTorch - converted_input = torch.from_numpy(mu.to_numpy()).double() + converted_input = torch.DoubleTensor(mu.to_numpy()) # obtain (reduced) coordinates by forward pass of the parameter values # through the neural network result_neural_network = self.neural_network(converted_input).data.numpy() @@ -160,24 +160,24 @@ class FullyConnectedNN(nn.Module, BasicObject): Parameters ---------- - layers_sizes + layer_sizes List of sizes (i.e. number of neurons) for the layers of the neural network. activation_function Function to use as activation function between the single layers. """ - def __init__(self, layers_sizes, activation_function=torch.tanh): + def __init__(self, layer_sizes, activation_function=torch.tanh): super().__init__() - if layers_sizes is None or not len(layers_sizes) > 1 or not all(size >= 1 for size in layers_sizes): + if layer_sizes is None or not len(layer_sizes) > 1 or not all(size >= 1 for size in layer_sizes): raise ValueError - self.input_dimension = layers_sizes[0] - self.output_dimension = layers_sizes[-1] + self.input_dimension = layer_sizes[0] + self.output_dimension = layer_sizes[-1] self.layers = nn.ModuleList() - self.layers.extend([nn.Linear(int(layers_sizes[i]), int(layers_sizes[i+1])) - for i in range(len(layers_sizes) - 1)]) + self.layers.extend([nn.Linear(int(layer_sizes[i]), int(layer_sizes[i+1])) + for i in range(len(layer_sizes) - 1)]) self.activation_function = activation_function diff --git a/src/pymor/reductors/neural_network.py b/src/pymor/reductors/neural_network.py index 78a55b41b3..c9d594b2c1 100644 --- a/src/pymor/reductors/neural_network.py +++ b/src/pymor/reductors/neural_network.py @@ -18,6 +18,7 @@ from pymor.algorithms.pod import pod from pymor.core.base import BasicObject from pymor.core.exceptions import NeuralNetworkTrainingFailed + from pymor.core.logger import getLogger from pymor.models.neural_network import FullyConnectedNN, NeuralNetworkModel, NeuralNetworkInstationaryModel class NeuralNetworkReductor(BasicObject): @@ -126,7 +127,7 @@ def reduce(self, hidden_layers='[(N+P)*3, (N+P)*3]', activation_function=torch.t # input and output size of the neural network are prescribed by the # dimension of the parameter space and the reduced basis size assert isinstance(hidden_layers, list) - layers = self._compute_layers_sizes(hidden_layers) + layer_sizes = self._compute_layer_sizes(hidden_layers) # compute validation data if not hasattr(self, 'validation_data'): @@ -146,35 +147,45 @@ def reduce(self, hidden_layers='[(N+P)*3, (N+P)*3]', activation_function=torch.t self.training_data = self.training_data[number_validation_snapshots+1:] # run the actual training of the neural network - with self.logger.block(f'Performing {restarts} restarts for training ...'): - - for run in range(restarts): - neural_network, current_losses = self._train(layers, activation_function, optimizer, - epochs, batch_size, learning_rate) - if not hasattr(self, 'losses') or current_losses['val'] < self.losses['val']: - self.losses = current_losses - self.neural_network = neural_network - - # check if neural network is sufficient to guarantee certain error bounds - with self.logger.block('Checking tolerances for error of neural network ...'): - - if isinstance(self.ann_mse, Number) and self.losses['full'] <= self.ann_mse: - self.logger.info(f'Aborting training after {run} restarts ...') - return self._build_rom() - elif self.ann_mse == 'like_basis' and self.losses['full'] <= self.mse_basis: - self.logger.info(f'Aborting training after {run} restarts ...') - return self._build_rom() + with self.logger.block('Training of neural network ...'): + # set target loss depending on value of ann_mse + target_loss = None + if isinstance(self.ann_mse, Number): + target_loss = self.ann_mse + elif self.ann_mse == 'like_basis': + target_loss = self.mse_basis + + # set parameters for neural network and training + neural_network_parameters = {'layer_sizes': layer_sizes, + 'activation_function': activation_function} + training_parameters = {'optimizer': optimizer, 'epochs': epochs, + 'batch_size': batch_size, 'learning_rate': learning_rate} + + self.logger.info('Initializing neural network ...') + # initialize the neural network + neural_network = FullyConnectedNN(**neural_network_parameters).double() + # run training algorithm with multiple restarts + self.neural_network, self.losses = multiple_restarts_training(self.training_data, self.validation_data, + neural_network, target_loss, restarts, + training_parameters, seed) # check if neural network is sufficient to guarantee certain error bounds with self.logger.block('Checking tolerances for error of neural network ...'): - if isinstance(self.ann_mse, Number) and self.losses['full'] > self.ann_mse: - raise NeuralNetworkTrainingFailed('Could not train a neural network that ' - 'guarantees prescribed tolerance!') - elif self.ann_mse == 'like_basis' and self.losses['full'] > self.mse_basis: - raise NeuralNetworkTrainingFailed('Could not train a neural network with an error as small as the ' - 'reduced basis error! Maybe you can try a different neural ' - 'network architecture or change the value of `ann_mse`.') + if isinstance(self.ann_mse, Number): + if self.losses['full'] > self.ann_mse: + raise NeuralNetworkTrainingFailed('Could not train a neural network that ' + 'guarantees prescribed tolerance!') + else: + return self._build_rom() + elif self.ann_mse == 'like_basis': + if self.losses['full'] > self.mse_basis: + raise NeuralNetworkTrainingFailed('Could not train a neural network with an error as small as ' + 'the reduced basis error! Maybe you can try a different ' + 'neural network architecture or change the value of ' + '`ann_mse`.') + else: + return self._build_rom() elif self.ann_mse is None: self.logger.info('Using neural network with smallest validation error ...') self.logger.info(f'Finished training with a validation loss of {self.losses["val"]} ...') @@ -182,9 +193,9 @@ def reduce(self, hidden_layers='[(N+P)*3, (N+P)*3]', activation_function=torch.t else: raise ValueError('Unknown value for mean squared error of neural network') - def _compute_layers_sizes(self, hidden_layers): + def _compute_layer_sizes(self, hidden_layers): """Compute the number of neurons in the layers of the neural network.""" - return [len(self.fom.parameters), ] + hidden_layers + [len(self.reduced_basis), ] + return [self.fom.parameters.dim, ] + hidden_layers + [len(self.reduced_basis), ] def _build_rom(self): """Construct the reduced order model.""" @@ -193,95 +204,6 @@ def _build_rom(self): return rom - def _train(self, layers, activation_function, optimizer, epochs, batch_size, learning_rate): - """Perform a single training iteration and return the resulting neural network.""" - assert hasattr(self, 'training_data') - assert hasattr(self, 'validation_data') - - # LBFGS-optimizer does not support mini-batching, so the batch size needs to be adjusted - if optimizer == optim.LBFGS: - batch_size = max(len(self.training_data), len(self.validation_data)) - - with self.logger.block('Training the neural network ...'): - - # initialize the neural network - neural_network = FullyConnectedNN(layers, - activation_function=activation_function).double() - - # initialize the optimizer - optimizer = optimizer(neural_network.parameters(), - lr=learning_rate) - - loss_function = nn.MSELoss() - early_stopping_scheduler = EarlyStoppingScheduler(len(self.training_data) + len(self.validation_data)) - - # create the training and validation sets as well as the respective data loaders - training_dataset = CustomDataset(self.training_data) - validation_dataset = CustomDataset(self.validation_data) - phases = ['train', 'val'] - training_loader = utils.data.DataLoader(training_dataset, - batch_size=batch_size) - validation_loader = utils.data.DataLoader(validation_dataset, - batch_size=batch_size) - dataloaders = {'train': training_loader, 'val': validation_loader} - - self.logger.info('Starting optimization procedure ...') - - # perform optimization procedure - for epoch in range(epochs): - losses = {'full': 0.} - - # alternate between training and validation phase - for phase in phases: - if phase == 'train': - neural_network.train() - else: - neural_network.eval() - - running_loss = 0.0 - - # iterate over batches - for batch in dataloaders[phase]: - inputs = batch[0] - targets = batch[1] - - with torch.set_grad_enabled(phase == 'train'): - def closure(): - if torch.is_grad_enabled(): - optimizer.zero_grad() - outputs = neural_network(inputs) - loss = loss_function(outputs, targets) - if loss.requires_grad: - loss.backward() - return loss - - # perform optimization step - if phase == 'train': - optimizer.step(closure) - - # compute loss of current batch - loss = closure() - - # update overall absolute loss - running_loss += loss.item() * len(batch[0]) - - # compute average loss - epoch_loss = running_loss / len(dataloaders[phase].dataset) - - losses[phase] = epoch_loss - - losses['full'] += running_loss - - # check for early stopping - if phase == 'val' and early_stopping_scheduler(losses, neural_network): - if not self.logging_disabled: - self.logger.info(f'Early stopping training process after {epoch + 1} epochs ...') - self.logger.info('Minimum validation loss: ' - f'{early_stopping_scheduler.best_losses["val"]}') - return early_stopping_scheduler.best_neural_network, early_stopping_scheduler.best_losses - - return early_stopping_scheduler.best_neural_network, early_stopping_scheduler.best_losses - def build_basis(self): """Compute a reduced basis using proper orthogonal decomposition.""" with self.logger.block('Building reduced basis ...'): @@ -308,12 +230,10 @@ def build_basis(self): return reduced_basis, mean_square_loss def _compute_sample(self, mu, u, reduced_basis): - """Transform parameter and corresponding solution to tensors.""" + """Transform parameter and corresponding solution to |NumPy arrays|.""" # determine the coefficients of the full-order solutions in the reduced basis to obtain - # the training data; convert everything into tensors that are compatible with PyTorch - mu_tensor = torch.DoubleTensor(mu.to_numpy()) - u_tensor = torch.DoubleTensor(reduced_basis.inner(u)[:, 0]) - return [(mu_tensor, u_tensor)] + # the training data + return [(mu, reduced_basis.inner(u)[:, 0])] def reconstruct(self, u): """Reconstruct high-dimensional vector from reduced vector `u`.""" @@ -371,11 +291,11 @@ def __init__(self, fom, training_set, validation_set=None, validation_ratio=0.1, assert 0 < validation_ratio < 1 or validation_set self.__auto_init(locals()) - def _compute_layers_sizes(self, hidden_layers): + def _compute_layer_sizes(self, hidden_layers): """Compute the number of neurons in the layers of the neural network (make sure to increase the input dimension to account for the time). """ - return [len(self.fom.parameters) + 1] + hidden_layers + [len(self.reduced_basis)] + return [self.fom.parameters.dim + 1, ] + hidden_layers + [len(self.reduced_basis), ] def _build_rom(self): """Construct the reduced order model.""" @@ -416,12 +336,13 @@ def build_basis(self): return reduced_basis, mean_square_loss def _compute_sample(self, mu, u, reduced_basis): - """Transform parameter and corresponding solution to tensors - (make sure to include the time instances in the inputs). + """Transform parameter and corresponding solution to |NumPy arrays|. + + This function takes care of including the time instances in the inputs. """ parameters_with_time = [mu.with_(t=t) for t in np.linspace(0, self.fom.T, self.nt)] - samples = [(torch.DoubleTensor(mu.to_numpy()), torch.DoubleTensor(reduced_basis.inner(u_t)[:, 0])) + samples = [(mu, reduced_basis.inner(u_t)[:, 0]) for mu, u_t in zip(parameters_with_time, u)] return samples @@ -504,3 +425,248 @@ def __len__(self): def __getitem__(self, idx): t = self.training_data[idx] return t + + def train_neural_network(training_data, validation_data, neural_network, + training_parameters={}): + """Training algorithm for artificial neural networks. + + Trains a single neural network using the given training and validation data. + + Parameters + ---------- + training_data + Data to use during the training phase. Has to be a list of tuples, + where each tuple consists of two elements that are either + PyTorch-tensors (`torch.DoubleTensor`) or |NumPy arrays| or pyMOR data + structures that have `to_numpy()` implemented. + The first element contains the input data, the second element contains + the target values. + validation_data + Data to use during the validation phase. Has to be a list of tuples, + where each tuple consists of two elements that are either + PyTorch-tensors (`torch.DoubleTensor`) or |NumPy arrays| or pyMOR data + structures that have `to_numpy()` implemented. + The first element contains the input data, the second element contains + the target values. + neural_network + The neural network to train (can also be a pre-trained model). + Has to be a PyTorch-Module. + training_parameters + Dictionary with additional parameters for the training routine like + the type of the optimizer, the (maximum) number of epochs, the batch + size, the learning rate or the loss function to use. + Possible keys are `'optimizer'` (an optimizer from the PyTorch `optim` + package; if not provided, the LBFGS-optimizer is taken as default), + `'epochs'` (an integer that determines the number of epochs to use + for training the neural network (if training is not interrupted + prematurely due to early stopping); if not provided, 1000 is taken as + default value), `'batch_size'` (an integer that determines the number + of samples to pass to the optimizer at once; if not provided, 20 is + taken as default value; not used in the case of the LBFGS-optimizer + since LBFGS does not support mini-batching), `'learning_rate'` (a + positive real number used as the (initial) step size of the optimizer; + if not provided, 1 is taken as default value; thus far, no learning + rate schedulers are supported in this implementation), and + `'loss_function'` (a loss function from PyTorch; if not provided, the + MSE loss is taken as default). + + Returns + ------- + best_neural_network + The best trained neural network with respect to validation loss. + losses + The corresponding losses as a dictionary with keys `'full'` (for the + full loss containing the training and the validation average loss), + `'train'` (for the average loss on the training set), and `'val'` + (for the average loss on the validation set). + """ + assert isinstance(neural_network, nn.Module) + + for data in training_data, validation_data: + assert isinstance(data, list) + assert all(isinstance(datum, tuple) and len(datum) == 2 for datum in data) + + def prepare_datum(datum): + if not (isinstance(datum, torch.DoubleTensor) or isinstance(datum, np.ndarray)): + return datum.to_numpy() + return datum + + training_data = [(prepare_datum(datum[0]), prepare_datum(datum[1])) for datum in training_data] + validation_data = [(prepare_datum(datum[0]), prepare_datum(datum[1])) for datum in validation_data] + + optimizer = optim.LBFGS if 'optimizer' not in training_parameters else training_parameters['optimizer'] + epochs = 1000 if 'epochs' not in training_parameters else training_parameters['epochs'] + assert isinstance(epochs, int) and epochs > 0 + batch_size = 20 if 'batch_size' not in training_parameters else training_parameters['batch_size'] + assert isinstance(batch_size, int) and batch_size > 0 + learning_rate = 1. if 'learning_rate' not in training_parameters else training_parameters['learning_rate'] + assert learning_rate > 0. + loss_function = (nn.MSELoss() if 'loss_function' not in training_parameters + else training_parameters['loss_function']) + + logger = getLogger('pymor.algorithms.neural_network.train_neural_network') + + # LBFGS-optimizer does not support mini-batching, so the batch size needs to be adjusted + if optimizer == optim.LBFGS: + batch_size = max(len(training_data), len(validation_data)) + + # initialize optimizer and early stopping scheduler + optimizer = optimizer(neural_network.parameters(), lr=learning_rate) + early_stopping_scheduler = EarlyStoppingScheduler(len(training_data) + len(validation_data)) + + # create the training and validation sets as well as the respective data loaders + training_dataset = CustomDataset(training_data) + validation_dataset = CustomDataset(validation_data) + training_loader = utils.data.DataLoader(training_dataset, batch_size=batch_size) + validation_loader = utils.data.DataLoader(validation_dataset, batch_size=batch_size) + dataloaders = {'train': training_loader, 'val': validation_loader} + + phases = ['train', 'val'] + + logger.info('Starting optimization procedure ...') + + # perform optimization procedure + for epoch in range(epochs): + losses = {'full': 0.} + + # alternate between training and validation phase + for phase in phases: + if phase == 'train': + neural_network.train() + else: + neural_network.eval() + + running_loss = 0.0 + + # iterate over batches + for batch in dataloaders[phase]: + inputs = batch[0] + targets = batch[1] + + with torch.set_grad_enabled(phase == 'train'): + def closure(): + if torch.is_grad_enabled(): + optimizer.zero_grad() + outputs = neural_network(inputs) + loss = loss_function(outputs, targets) + if loss.requires_grad: + loss.backward() + return loss + + # perform optimization step + if phase == 'train': + optimizer.step(closure) + + # compute loss of current batch + loss = closure() + + # update overall absolute loss + running_loss += loss.item() * len(batch[0]) + + # compute average loss + epoch_loss = running_loss / len(dataloaders[phase].dataset) + + losses[phase] = epoch_loss + + losses['full'] += running_loss + + # check for early stopping + if phase == 'val' and early_stopping_scheduler(losses, neural_network): + logger.info(f'Stopping training process early after {epoch + 1} epochs with validation loss ' + f'of {early_stopping_scheduler.best_losses["val"]:.3e} ...') + return early_stopping_scheduler.best_neural_network, early_stopping_scheduler.best_losses + + return early_stopping_scheduler.best_neural_network, early_stopping_scheduler.best_losses + + def multiple_restarts_training(training_data, validation_data, neural_network, + target_loss=None, max_restarts=10, + training_parameters={}, seed=None): + """Algorithm that performs multiple restarts of neural network training. + + This method either performs a predefined number of restarts and returns + the best trained network or tries to reach a given target loss and + stops training when the target loss is reached. + + See :func:`train_neural_network` for more information on the parameters. + + Parameters + ---------- + training_data + Data to use during the training phase. + validation_data + Data to use during the validation phase. + target_loss + Loss to reach during training (if `None`, the network with the + smallest loss is returned). + max_restarts + Maximum number of restarts to perform. + neural_network + The neural network to train (parameters will be reset after each + restart). + + Returns + ------- + best_neural_network + The best trained neural network. + losses + The corresponding losses. + + Raises + ------ + NeuralNetworkTrainingFailed + Raised if prescribed loss can not be reached within the given number + of restarts. + """ + assert isinstance(training_parameters, dict) + assert isinstance(max_restarts, int) and max_restarts > 0 + + logger = getLogger('pymor.algorithms.neural_network.multiple_restarts_training') + + # if applicable, set a common seed for the PyTorch initialization + # of weights and biases and further PyTorch methods for all training runs + if seed: + torch.manual_seed(seed) + + if target_loss: + logger.info(f'Performing up to {max_restarts} restart{"s" if max_restarts > 1 else ""} ' + f'to train a neural network with a loss below {target_loss:.3e} ...') + else: + logger.info(f'Performing up to {max_restarts} restart{"s" if max_restarts > 1 else ""} ' + 'to find the neural network with the lowest loss ...') + + with logger.block('Training neural network #0 ...'): + best_neural_network, losses = train_neural_network(training_data, validation_data, + neural_network, training_parameters) + + # perform multiple restarts + for run in range(1, max_restarts + 1): + + if target_loss and losses['full'] <= target_loss: + logger.info(f'Finished training after {run - 1} restart{"s" if run - 1 != 1 else ""}, ' + f'found neural network with loss of {losses["full"]:.3e} ...') + return neural_network, losses + + with logger.block(f'Training neural network #{run} ...'): + # reset parameters of layers to start training with a new and untrained network + for layers in neural_network.children(): + for layer in layers: + if hasattr(layer, 'reset_parameters'): + layer.reset_parameters() + # perform training + current_nn, current_losses = train_neural_network(training_data, validation_data, + neural_network, training_parameters) + + if current_losses['full'] < losses['full']: + logger.info(f'Found better neural network (loss of {current_losses["full"]:.3e} ' + f'instead of {losses["full"]:.3e}) ...') + best_neural_network = current_nn + losses = current_losses + else: + logger.info(f'Rejecting neural network with loss of {current_losses["full"]:.3e} ' + f'(instead of {losses["full"]:.3e}) ...') + + if target_loss: + raise NeuralNetworkTrainingFailed(f'Could not find neural network with prescribed loss of ' + f'{target_loss:.3e} (best one found was {losses["full"]:.3e})!') + logger.info(f'Found neural network with error of {losses["full"]:.3e} ...') + return best_neural_network, losses