# An explanation of how to set up the MINLP TNN package for use

Disclaimer: This file is not meant to be run but rather to give an idea of the necessary functions, where they can be found and how they can be implemented. The case studies in the example foler can be run to see this code in action

In [None]:
import pyomo.environ as pyo
import numpy as np
import os
import MINLP_tnn.helpers.extract_from_pretrained as extract_from_pretrained
from gurobipy import GRB
from gurobi_ml import add_predictor_constr
from MINLP_tnn.helpers.print_stats import save_gurobi_results
import MINLP_tnn.helpers.convert_pyomo as convert_pyomo
from MINLP_tnn.helpers.GUROBI_ML_helper import get_inputs_gurobipy_FFN
from combine_csv import combine


## 1. Load Trained TNN & Get Learnt Parameters

The parsing functions have a parameter to pass input values to the TNN model. This is necessary:
1) To determine the execution order of TNN layers by passing dummy values through the NN and monitoring how layer functions are called.
    * Dummy values can be used as place holders. The sample inputs are a passed through the NN in a forward pass to parse the order of layer execution. In some cases the model summary does not print the layers in the order in which they are implemented by the NN.
2) To verify that the trained TNN and formulated TNN give the same result.
    * If you supply actual data points to the parsing functions, the "layer_outputs_dict" will return the output of each tnn layer due to the input values. This feature can be used to compare the trained TNNs output form each layer to the output of each layer of the formulated TNN.


Pytorch:
- supports ReLU activation functions only

In [None]:
import torch
device = "cpu"
tnn_path = " " #TO DO: set path
tnn_model = torch.load(tnn_path, map_location=device, weights_only=False)

# PARSE ViT_tnn:
layer_names, parameters, _, layer_outputs_dict = extract_from_pretrained.get_torchViT_learned_parameters(tnn_model, input, heads)
        
# PARSE OTHER PYTORCH
layer_names, dict_transformer_params, tnn_model, [count_encoder_layers, count_decoder_layers], layer_outputs_dict = extract_from_pretrained.get_pytorch_learned_parameters(model, enc_input, dec_input, num_heads, sequence_size=None)

Hugging Face's Pretrained Time Series:
- supports ReLU activation functions
- supports SiLU activation functions

In [None]:
from transformers.models.time_series_transformer.configuration_time_series_transformer import TimeSeriesTransformerConfig
from transformers.models.time_series_transformer.modeling_time_series_transformer import TimeSeriesTransformerForPrediction
# cloned transformers from: https://github.com/s-hallsworth/transformers.git

# LOAD MODEL
device = "cpu"
train_tnn_path = " " #TO DO: set path
NUMBER_OF_POINTS = 8

config = TimeSeriesTransformerConfig(
        prediction_length=NUMBER_OF_POINTS,
    )
tnn_model = TimeSeriesTransformerForPrediction(config).to(device)
tnn_model = torch.load(train_tnn_path, weights_only=False, map_location=torch.device('cpu'))
tnn_model.config.prediction_length = NUMBER_OF_POINTS
tnn_model.config.context_length=3
tnn_model.config.embedding_dimension=60
tnn_model.config.scaling=False
tnn_model.config.lags_sequence=[0]
tnn_model.config.num_time_features=1
tnn_model.config.input_size=9
tnn_model.config.num_parallel_samples=1

# CONFIG SAMPLE INPUTS TO MATCH EXPECTED INPUT SHAPES
hugging_face_dict = {}
hugging_face_dict["past_values"] =  past_values
hugging_face_dict["past_time_features"] = past_time_features
hugging_face_dict["past_observed_mask"] = past_observed_mask
hugging_face_dict["future_time_features"] = future_time_features

src = torch.ones(1, tnn_model.config.input_size) #dummy input encoder
tgt = torch.ones(1,  NUMBER_OF_POINTS, tnn_model.config.input_size) #dummy input decoder

# GET LEARNT PARAMS:
layer_names, parameters, _, enc_dec_count, layer_outputs_dict = extract_from_pretrained.get_hugging_learned_parameters(tnn_model, src , tgt, 2, hugging_face_dict)
    

Keras:
- supports ReLU activation functions only

In [None]:
# Get learnt parameters

tnn_model_PATH = " " # TO DO: fill in path
layer_names, parameters , tnn_model = extract_from_pretrained.get_learned_parameters(tnn_model_PATH)


## 2. Create MINLP_tnn instance 

In [None]:
from MINLP_tnn.transformer import Transformer as TNN

# DEFINE TNN HYPER-PARAMETERS

# define hyper parameter list
config_list = [encoder_sequence_length, embedding_dim, head_size , num_heads, input_fetaure dim, epsilon_layer_normalisation ]

# or a path to a json with hyper parameters
config_list = "\path\to\json.json"


# CREATE TRANSFORMER
transformer = TNN.Transformer(config, pyomo_model) 

## 3. Add layers to MINLP_tnn

Build Pytorch TNN. See transformer.py for the caveats of this function

In [None]:
# Build default Pytorch TNN
transformer.build_from_hug_torch(tnn_model, sample_enc_input, sample_dec_input, enc_bounds = None , dec_bounds = None, Transformer='pytorch', default=True, hugging_face_dict=None)

# OR: Parse Pytorch TNN and add residual layers with normalisation layers
transformer.build_from_hug_torch(tnn_model, sample_enc_input, sample_dec_input, enc_bounds = None , dec_bounds = None, Transformer='pytorch', default=False, hugging_face_dict=None)


Or add layers individually

In [None]:
# add input variable
input_var = transformer.add_input_var(input_var_name, dims, bounds)

In [None]:
# add positional encoding
PE_var = transformer.add_pos_encoding(input_var_name:Union[pyo.Var,str], embed_var_name, b_emb)

In [None]:
# add embedding layer
EMD_var = transformer.embed_input(input_var_name:Union[pyo.Var,str], embed_var_name, embed_dim_2, W_emb=None, b_emb = None)

In [None]:
# add layer normalisation layer
LN_var = transformer.add_layer_norm(input_var_name:Union[pyo.Var,str], LN_var_name, gamma= None, beta = None, eps=None)

In [None]:
# add attention layer: config cross attention, masked attention, max_normalised softmax
MHA_var = transformer.add_attention(input_var_name:Union[pyo.Var,str], output_var_name, W_q, W_k, W_v, W_o, b_q = None, b_k = None, b_v = None, b_o = None, mask=False, cross_attn=False, encoder_output:Union[pyo.Var,str]=None, exp_approx=False, norm_softmax=False)

In [None]:
# add residual layer
RES_var = transformer.add_residual_connection(input_1_name:Union[pyo.Var,str], input_2_name:Union[pyo.Var,str], output_var_name)

In [None]:
# PYOMO: add FFN to model using OMLT
from omlt.neuralnet import ReluBigMFormulation

FNN_var = transformer.add_FFN_2D(input_var_name:Union[pyo.Var,str], output_var_name, nn_name, input_shape, model_parameters, bounds = (-2, 2), formulation=ReluBigMFormulation)

In [None]:
# GUROBI conversion of Pyomo model: add FFN to model using GurobiML (a forked version is created to support SiLU activation)

# get FNN
ffn_parameter_dict = {}
ffn_parameter_dict["unique_fnn_name"] = transformer.get_ffn(input_var_name:Union[pyo.Var,str], output_var_name, nn_name, input_shape, model_parameters)

# convert Pyomo model to Gurobi
gurobi_model, map_var, _ = convert_pyomo.to_gurobi(pyomo_model) ##--- CONVERT PYOMO MODEL TO GUROBIPY ---##

# add FNN to Gurobi using GurobiML
for key, value in ffn_parameter_dict.items():
    nn, input_nn, output_nn = value
    input, output = get_inputs_gurobipy_FFN(input_nn, output_nn, map_var)
    pred_constr = add_predictor_constr(gurobi_model, nn, input, output)
gurobi_model.update() # update gurobi model with FFN constraints

In [None]:
# add average pooling layer
AVG_var = transformer.add_avg_pool(input_var_name:Union[pyo.Var,str], output_var_name)

## 4. Connect TNN Output Vars to Problem Definition Vars


In [None]:
# out: output of last transformer layer
# model.out: variable from problem definition that is determined by TNN

pyomo_model.out_constraints = pyo.ConstraintList()
for i in pyomo_model.out.index_set():
    pyomo_model.out_constraints.add(expr= pyomo_model.out[i] == out[i])

## 5. Solve Optimisation Model

In [None]:
gurobi_model.optimize()