In [1]:
%load_ext autoreload
%autoreload 2

# Global modules
import torch
import numpy as np

# Local modules
import ValueFunctionIteration
import DeepExplore

import simprep
import print_funcs # Functions for checking namespaces and printing module information
import load_and_dump # Functions for loading and dumping PyTorch neural networks or tensors and np.arrays

device = "cuda:0" if torch.cuda.is_available() else "cpu"

## 1. Importing modules, checking parameters and creating dict for seeds

In [3]:
# 1. Import DeepExplore and ValueFunctionIteration modules
de = DeepExplore.DeepExplore()
vfi = ValueFunctionIteration.VFI()

# 2. Use the check_namespaces attribute of the print_funcs module to check whether the namespaces have the same parameters
print_funcs.check_namespaces(vfi.par, de.par)

# 3. Create a unique dictionary containing training seeds for all iterations in the DeepExplore parameter namespace
#    The dict also has a unique test seed, which is used in evaluation iterations in DeepExplore
#    The dict can also be passed to the simulation attribute in the ValueFunctionIteration module to ensure comparable results between the two methods
train_test_dict = de.sim_dict()

dtype is NOT the same in both namespaces: <class 'numpy.float64'> not equal to torch.float32

The following differences in attributes have been detected:
-------- Namespace one --------------------------------- Namespace two ---------------------------------
dtype	 <class 'numpy.float64'> 	 	 	 torch.float32


## 2. Value Function Iteration

In [4]:
vfi.solve() # Solves model
vfi.simulate(train_test_dict) # Simulates model using the unique test seed in the dict
vfi.compute_euler_errors() # Computes Euler Errors

Period 49 solved in 4.89 seconds


In [None]:
# Dump time, sim results and euler errors to visualize in other notebook
load_and_dump.dump_arrays(vfi.sol.policy, 'vfi_policy')
load_and_dump.dump_arrays(vfi.sol.comp_time, 'vfi_comp_time')
load_and_dump.dump_arrays(vfi.sim.action, 'vfi_sim_action')
load_and_dump.dump_arrays(vfi.sim.state, 'vfi_sim_state')
load_and_dump.dump_arrays(vfi.sim.obj, 'vfi_sim_obj')
load_and_dump.dump_arrays(vfi.sim.euler_errors, 'vfi_euler_errors')

## 2. DeepExplore

The DeepExplore algorithm is computed by calling the .train_nn_evals attribute in the DeepExplore module. For specific information about this function, the reader is referred to DeepExplore.py, where it is explained in detail.

train_nn_evals takes two positional arguments, namely train_test_dict and device.

A lot of training specifications can be passed to train_nn_evals as keyword arguments. In order to avoid errors during training, the function asserts that all keyword arguments are of a fitting format.

The train_test_dict has to consist of training seeds for all iterations. This is asserted.



In [None]:
de.train_nn_evals(train_test_dict, device) # Trains the DNN in DeepExplore with the specifications of the parameters in the namespace

### Dumping computation time, out-of-sample evaluations, nn, euler errors and more

By calling the attribute .generate_bool_list in the DeepExplore module, a list of boolean values the length of number of total iterations, is made.

If a list of indices is specified in the function call, the function will return a list of boolean value with 1's (True) where specified. Otherwise it is 0's (False).

This can e.g. be used to specify evaluation iterations, where we want to save/dump the tensors with computation time, out-of-sample evaluations or the nns

In [5]:
dump_bool_list = de.generate_bool_list([-1]) # Specify to have a True boolean in the last evaluation iteration

run_name = 'baseline_run' # Specify the name of training "run"/session. The results will be saved with this name. If None is specified, the results are saved under the date
 
de.train_nn_evals(train_test_dict, device, dump_eval_iteration=dump_bool_list, run_name=run_name) # Save results of last evaluation iteration

# If any boolean in the dump_eval_iteration keyword argument is True, the train_nn_evals will also save the elements of interest, if early stopping is envoked.

In [None]:
# The dump_bool_list can also be used to make booleans for when the train_nn_evals function should compute euler errors

run_name = 'baseline_run_w_euler_errors'

de.train_nn_evals(train_test_dict, device, run_name=run_name, euler_errors_eval_iteration=dump_bool_list)

# Because of the long computation time of euler errors, the are always dumped after computation

### Load nn, simulate model and compute euler errors

Using the load_and_dump module, a nn can be loaded.

Specifying the nn in the DeepExplore module allows us to simulate the model and compute euler errors using the nn

In [None]:
nn = load_and_dump.load_nn('neuron_450_450', np.array([450, 450])).to(device)

de.simulate_test(train_test_dict, device, nn=nn) # Simulates model and generates sim namespace, like used in ValueFunctionIteration module

de.compute_euler_errors(device, nn=nn) # Computes euler errors, note that this takes long time.

## 3. DeepExplore with differentiated learning rates

The train_nn_evals function can also be used for training with differentiated learning.

To do this, a list of tuples is passed to the .generate_lr_list attribute in the DeepExplore module.

This tuples has to have to format of (index_for_lr_change, new_lr).

In [6]:
lr_list=de.generate_lr_list([(0, 0.001), (99, 0.0001), (499, 0.00005), (799, 0.00001)]) # Differentiated learning scheudle like that in thesis

dump_bool_list = de.generate_bool_list([-1]) # dump nn, losses, time and euler errors in last evaluation iteration

run_name = 'dif_lr_run'

de.train_nn_evals(train_test_dict, device, run_name=run_name, dump_eval_iteration=dump_bool_list, euler_errors_eval_iteration=dump_bool_list, lr_list=lr_list)

## 4. DeepExplore with different nn structures

When evaluating the DeepExplore algorithm with different neural network structures, one has to specify the keyword argument "neurons" in train_nn_evals

For training the 12 neural networks presented in the thesis, a for-loop is used.

In [7]:
dump_bool_list = de.generate_bool_list([-1]) # dump nn, losses and time in last eval iteration

neuron_list = [np.array([150]), np.array([150, 150]), np.array([150, 150, 150]), np.array([300]), np.array([300, 300]), np.array([300, 300, 300]), np.array([450]), np.array([450, 450]), np.array([450, 450, 450]), np.array([600]), np.array([600, 600]), np.array([600, 600, 600])]
name_list = ['150', '150_150', '150_150_150', '300', '300_300', '300_300_300', '450', '450_450', '450_450_450', '600', '600_600', '600_600_600']

for i in range(len(neuron_list)):
    neuron_array = neuron_list[i]
    run_name = 'neuron_layers_' + name_list[i]
    de.train_nn_evals(train_test_dict, device, run_name=run_name, neurons=neuron_array, dump_eval_iteration=dump_bool_list, euler_errors_eval_iteration=dump_bool_list)