In [1]:
import math
import json
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

!nvidia-smi

Sun Jun 11 10:11:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:15:00.0 Off |                    0 |
| N/A   27C    P0    39W / 300W |      3MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# Dispositivo onde tensores serão criados, armazenados e processados
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# Randon Seed fixa para resultados reprodutíveis
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

cuda:0


## Data Loader

In [3]:
class WellLoader(Dataset):
    
    def __init__(self, 
                 path, 
                 wells, 
                 var_in, 
                 var_out,
                 normalizing_percentile=90.0,
                 normalizing_split=0.2,
                 normalizer=RobustScaler,
                 max_sequence=16, 
                 step=1):
        
        self.path = path
        with open(self.path + '/metadata.json', 'r') as metafile:
            self.metadata = json.loads(metafile.read())
        self.wells = wells
        self.var_in = var_in
        self.var_out = var_out
        self.normalizing_percentile=normalizing_percentile
        self.normalizing_split = normalizing_split
        self.normalizer = normalizer
        self.max_sequence = max_sequence
        self.step = step
        self.batches_X = None
        self.batches_Y = None
        self.outputs = None
        self.normalizers = []
        
        indexes = self.get_wells_index(self.wells)
        self.load_data_by_index(indexes)
        
    def get_wells_index(self, wells):
        
        indexes = []
        for well, filt in wells:
            indexes.extend([(meta['INDEX'], filt) for meta in self.metadata if meta['WELL'] == well])
        return indexes
    
    def load_data_by_index(self, indexes):
        
        batches_X = []
        batches_Y = []
        outputs = []
        for index, filt in indexes:
            data = pd.read_json(f'{self.path}/{index}.json')#.reset_index()
            # Armengue: Por liq vol para preencher o dataset
            data['BORE_LIQ_VOL'] = data['BORE_OIL_VOL'] + data['BORE_WAT_VOL']
            data = data[self.var_in + self.var_out].dropna().reset_index(drop=True)
            X = data[self.var_in].values[filt,:]
            Y = data[self.var_out].values[filt,:]
            X_base, _, Y_base, _ = train_test_split(X, Y, test_size = self.normalizing_split)
            #scaler_X = X_base.max(axis=0, keepdims=True)
            #scaler_Y = Y_base.max(axis=0, keepdims=True)
            scaler_X = self.normalizer().fit(X_base)
            scaler_Y = self.normalizer().fit(Y_base)
            #scaler_X = np.percentile(X_base,self.normalizing_percentile,axis=0,keepdims=True)
            #scaler_Y = np.percentile(Y_base,self.normalizing_percentile,axis=0,keepdims=True)
            self.normalizers.append((scaler_X, scaler_Y))
            X, Y = scaler_X.transform(X), scaler_Y.transform(Y)
            #X, Y = X / scaler_X, Y / scaler_Y
            X, Y = torch.from_numpy(X.astype('float32')), torch.from_numpy(Y.astype('float32'))
            output = Y[self.max_sequence::self.step]
            #print(Y.shape)
            #X = torch.split(X, self.max_sequence, dim= 0)
            #Y = torch.split(Y, self.max_sequence, dim= 0)
            X = X.unfold(0,self.max_sequence, self.step)
            Y = Y.unfold(0,self.max_sequence, self.step)
            batches_X.append(X)
            batches_Y.append(Y)
            outputs.append(output)
            #print(X.shape)
        self.batches_X = torch.concat(batches_X, axis=0)
        self.batches_Y = torch.concat(batches_Y, axis=0)
        self.outputs = torch.concat(outputs, axis=0)
            
    def __len__(self):
        
        return self.outputs.shape[0]
    
    def __getitem__(self, idx):
                        
        srcs = self.batches_X[idx,:,:]
        trgts = self.batches_Y[idx,:,:]
        output = self.outputs[idx,:]
        
        return srcs, trgts, output
        
        
                 

In [4]:
path = './dataset/volve'
wells = [
    ('15/9-F-1 C', slice(28, None)),
]

var_in = [
        'AVG_DOWNHOLE_PRESSURE',
        'AVG_WHP_P',
        'AVG_CHOKE_SIZE_P',
        'AVG_WHT_P',
        'AVG_DOWNHOLE_TEMPERATURE',
]

var_out = [
        #'BORE_OIL_VOL',
        'BORE_LIQ_VOL',
        #'BORE_GAS_VOL',
        #'BORE_WAT_VOL',
]


dataset = WellLoader(path, wells, var_in, var_out, max_sequence = 8)

In [5]:
class SubsetSplitter:
    
    def __init__(self, batch_size, validation_split, test_split, shuffle=False):
        
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.test_split = test_split
        self.shuffle = shuffle
        
    def __call__(self, dataset:Dataset):
        
        dataset_size = len(dataset)
        indices = list(range(dataset_size))
        validation_split = int(np.floor(self.validation_split * dataset_size))
        test_split = int(np.floor(self.test_split * dataset_size))
        train_split = dataset_size - validation_split - test_split
        # Extracting test independently of others
        test_indices = indices[train_split + validation_split:]
        indices = indices[:train_split + validation_split]
        if self.shuffle :
            np.random.shuffle(indices)
        train_indices = indices[:train_split]
        validation_indices = indices[train_split:]

        # Creating PT data samplers and loaders:
        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(validation_indices)
        test_sampler = SubsetRandomSampler(test_indices)

        train_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, 
                                                   sampler=train_sampler)
        validation_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size,
                                                    sampler=valid_sampler)
        test_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size,
                                                    sampler=test_sampler)
        
        return train_loader, validation_loader, test_loader

In [6]:
splitter = SubsetSplitter(4, 0.1, 0.3, shuffle=False)
train_loader, validation_loader, test_loader = splitter(dataset)

## Model definition

In [7]:
class PositionalEncoding(nn.Module):
    def __init__(self, dim_model, dropout_p, max_len):
        super().__init__()
        # Modified version from: https://pytorch.org/tutorials/beginner/transformer_tutorial.html
        # max_len determines how far the position can have an effect on a token (window)
        
        # Info
        self.dropout = nn.Dropout(dropout_p)
        
        # Encoding - From formula
        pos_encoding = torch.zeros(max_len, dim_model)
        positions_list = torch.arange(0, max_len, dtype=torch.float).view(-1, 1) # 0, 1, 2, 3, 4, 5
        division_term = torch.exp(torch.arange(0, dim_model, 2).float() * (-math.log(10000.0)) / dim_model) # 1000^(2i/dim_model)
        
        # PE(pos, 2i) = sin(pos/1000^(2i/dim_model))
        pos_encoding[:, 0::2] = torch.sin(positions_list * division_term)
        
        # PE(pos, 2i + 1) = cos(pos/1000^(2i/dim_model))
        pos_encoding[:, 1::2] = torch.cos(positions_list * division_term)
        
        # Saving buffer (same as parameter without gradients needed)
        pos_encoding = pos_encoding.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pos_encoding",pos_encoding)
        
    def forward(self, token_embedding: torch.tensor) -> torch.tensor:
        # Residual connection + pos encoding
        return self.dropout(token_embedding + self.pos_encoding[:token_embedding.size(0), :])
    
    
positional_encoding = PositionalEncoding(dim_model = 10, dropout_p=0.3, max_len=1200)
tensor = torch.zeros((1,2,10))
tensor
encoded = positional_encoding(tensor)
encoded

tensor([[[0.0000, 0.0000, 0.0000, 1.4286, 0.0000, 1.4286, 0.0000, 1.4286,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4286, 0.0000, 0.0000,
          0.0000, 1.4286]]])

In [8]:
class Transformer(nn.Module):
    """
    Model from "A detailed guide to Pytorch's nn.Transformer() module.", by
    Daniel Melchor: https://medium.com/@danielmelchor/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1
    """
    # Constructor
    def __init__(
        self,
        num_outputs,
        dim_model,
        num_heads,
        num_encoder_layers,
        num_decoder_layers,
        dropout_p,
        num_linear_layers=0,
        norm_first=False
    ):
        super().__init__()

        # INFO
        self.model_type = "Transformer"
        self.dim_model = dim_model

        # LAYERS
        self.positional_encoder = PositionalEncoding(
            dim_model=dim_model, dropout_p=dropout_p, max_len=5000
        )
        #self.embedding = nn.Embedding(num_outputs, dim_model)
        self.transformer = nn.Transformer(
            d_model=dim_model,
            nhead=num_heads,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dropout=dropout_p, 
            batch_first=True,
            norm_first = norm_first
        )
        self.linear_layers = nn.ModuleList()
        for i in range(num_linear_layers):
            self.linear_layers.append(nn.Linear(dim_model, dim_model))
            self.linear_layers.append(nn.ReLU6())
        self.out = nn.Linear(dim_model, num_outputs)
        #self.bias_layer = nn.Linear(dim_model, dim_model)
        
    def forward(self, src, tgt, tgt_mask=None, src_pad_mask=None, tgt_pad_mask=None):
        # Src size must be (batch_size, src sequence length)
        # Tgt size must be (batch_size, tgt sequence length)

        # Embedding + positional encoding - Out size = (batch_size, sequence length, dim_model)
        #src = self.embedding(src) * math.sqrt(self.dim_model)
        #tgt = self.embedding(tgt) * math.sqrt(self.dim_model)
        src_corr = src #* math.sqrt(self.dim_model)
        tgt_corr = tgt #* math.sqrt(self.dim_model)
        #src_corr = self.positional_encoder(src_corr)
        #tgt_corr = self.positional_encoder(tgt_corr)
        
        # We could use the parameter batch_first=True, but our KDL version doesn't support it yet, so we permute
        # to obtain size (sequence length, batch_size, dim_model),
        #src = src.permute(1,0,2)
        #tgt = tgt.permute(1,0,2)

        # Transformer blocks - Out size = (sequence length, batch_size, num_tokens)
        transformer_out = self.transformer(src_corr, tgt_corr, tgt_mask=tgt_mask, src_key_padding_mask=src_pad_mask, tgt_key_padding_mask=tgt_pad_mask)
        for linear in self.linear_layers:
            transformer_out = linear(transformer_out)
        out = transformer_out
        out = torch.add(out, tgt)
        
        #recuperando informacao de escala
        #out = torch.mul(out, tgt)
        #bias = self.bias_layer(tgt)
        #out = torch.add(out, bias)
        
        out = self.out(out)
        
        
        return out
      
    def get_tgt_mask(self, size) -> torch.tensor:
        # Generates a squeare matrix where the each row allows one word more to be seen
        mask = torch.tril(torch.ones(size, size) == 1) # Lower triangular matrix
        mask = mask.float()
        mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
        mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0
        
        # EX for size=5:
        # [[0., -inf, -inf, -inf, -inf],
        #  [0.,   0., -inf, -inf, -inf],
        #  [0.,   0.,   0., -inf, -inf],
        #  [0.,   0.,   0.,   0., -inf],
        #  [0.,   0.,   0.,   0.,   0.]]
        
        return mask
    
    def create_pad_mask(self, matrix: torch.tensor, pad_token: int) -> torch.tensor:
        # If matrix = [1,2,3,0,0,0] where pad_token=0, the result mask is
        # [False, False, False, True, True, True]
        return (matrix == pad_token)

In [9]:
class Training:
    
    def __init__(self, epochs, loss, optimizer, scheduler, path, model_name='Transformer', early_stop=True, patience=5):
        
        self.loss = loss
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.epochs = epochs
        self.path = path
        self.model_name = model_name
        self.early_stop_flag = early_stop
        self.patience = patience
        self.clear_results()
        
    def clear_results(self):
        
        self.results = {
            'Train':[],
            'Validation':[],
            'Test':[],
        }
        
    def fit(self, model, train_loader, validation_loader, test_loader):
        
        self.clear_results()
        torch.cuda.empty_cache()
        decrease = self.patience
        not_improved = 0
        
        model.to(device)
        fit_time = time.time()
        
        for e in range(self.epochs):
            since = time.time()
            running_loss = 0
            #training loop
            model.train()
            self.train_loop(model, train_loader)
            model.eval()
            self.validation_loop(model, validation_loader)
            self.test_loop(model, test_loader)
            decrease, not_improved = self.early_stopping(validation_loader, decrease)
            if not_improved == 1 and self.early_stop_flag:
                print('[***] end training ...') 
                break
            loss_per_training_batch = self.results['Train'][-1]
            loss_per_validation_batch = self.results['Validation'][-1]
            loss_per_test_batch = self.results['Test'][-1]
            print("Epoch:{}/{}..".format(e+1, self.epochs),
                  "Train Loss: {:.3f}..".format(loss_per_training_batch),
                  "Val Loss: {:.3f}..".format(loss_per_validation_batch),
                  "Test Loss: {:.3f}..".format(loss_per_test_batch),
                  "Time: {:.2f}m".format((time.time()-since)/60))
        print('Total time: {:.2f} m' .format((time.time()- fit_time)/60))
        
    def train_loop(self, model, train_loader):
        
        running_loss = 0.0
        for i, data in enumerate(tqdm(train_loader)):
            #training phase
            X, y_tgt, y_out = data
            X, y_tgt, y_out = X.to(device), y_tgt.to(device), y_out.to(device)
            #y_result, y_tgt = Y, torch.from_numpy(-1.0*np.ones(Y.shape).astype('float32')).to(device)
            #y_tgt[:,:,1:] = Y[:,:,:-1]
            #y_tgt[:,:,0] = 0.0
            output = model(X, y_tgt)
            loss = self.loss(output.ravel(), y_out.ravel())
            #backward
            loss.backward()
            self.optimizer.step() #update weight          
            self.optimizer.zero_grad() #reset gradient
            
            #step the learning rate
            if not self.scheduler is None:
                self.scheduler.step()
            running_loss += loss.item()
        
        self.results['Train'].append(running_loss/len(train_loader))
    
    
    def validation_loop(self, model, validation_loader):
        
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(tqdm(validation_loader)):
                #training phase
                X, y_tgt, y_out = data
                X, y_tgt, y_out = X.to(device), y_tgt.to(device), y_out.to(device)
                #y_result, y_tgt = Y, torch.from_numpy(-1.0*np.ones(Y.shape).astype('float32')).to(device)
                #y_tgt[:,:,1:] = Y[:,:,:-1]
                #y_tgt[:,:,0] = 0.0
                output = model(X, y_tgt)
                loss = self.loss(output.ravel(), y_out.ravel())
                
                running_loss += loss.item()
        
        self.results['Validation'].append(running_loss/len(validation_loader))
        
    def test_loop(self, model, test_loader):
        
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(tqdm(test_loader)):
                #training phase
                X, y_tgt, y_out = data
                X, y_tgt, y_out = X.to(device), y_tgt.to(device), y_out.to(device)
                #y_result, y_tgt = Y, torch.from_numpy(-1.0*np.ones(Y.shape).astype('float32')).to(device)
                #y_tgt[:,:,1:] = Y[:,:,:-1]
                #y_tgt[:,:,0] = 0.0
                output = model(X, y_tgt)
                loss = self.loss(output.ravel(), y_out.ravel())
                
                running_loss += loss.item()
        
        self.results['Test'].append(running_loss/len(test_loader))
        
    def early_stopping(self, validation_loader, decrease):
        
        loss_per_validation_batch = self.results['Validation'][-1]
        min_loss = np.min(self.results['Validation'][:-1] + [np.inf])
        if min_loss >= self.results['Validation'][-1]:
            print('Loss Decreasing.. {:.3f} >> {:.3f} '.format(min_loss, loss_per_validation_batch))
            decrease = self.patience
            print('saving model...')
            torch.save(model, self.path + f'/{self.model_name}.pt')
        else:
            decrease -= 1
        if decrease < 0:     
                not_improved = 1
        else:
            not_improved = 0
        return decrease, not_improved
    
    def get_best_model(self):
        
        model = torch.load(self.path + f'/{self.model_name}.pt')
        return model
            
            

In [10]:
model = Transformer(
    num_outputs=1, dim_model=dataset.max_sequence, num_heads=2, 
    num_encoder_layers=3, num_decoder_layers=3, dropout_p=0.1, norm_first=False,num_linear_layers=0).to(device)

lr_ = 5e-4
epoch = 1000
weight_decay = 1e-4
path = '.'
model_name = 'ADIM'

loss = torch.nn.MSELoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=lr_)
#optimizer = torch.optim.SGD(model.parameters(), lr=lr_)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr_, weight_decay=weight_decay)
sched = None
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, lr_, epochs=epoch,
                                            steps_per_epoch=len(train_loader))
training = Training(epoch, loss, optimizer, sched, model_name=model_name, path=path, early_stop=False, patience=20)

In [None]:
training.fit(model, train_loader, validation_loader, test_loader)
best_model = training.get_best_model()

100%|██████████| 107/107 [00:02<00:00, 52.61it/s]
100%|██████████| 18/18 [00:00<00:00, 375.11it/s]
100%|██████████| 53/53 [00:00<00:00, 388.02it/s]


Loss Decreasing.. inf >> 0.492 
saving model...
Epoch:1/1000.. Train Loss: 0.663.. Val Loss: 0.492.. Test Loss: 0.477.. Time: 0.04m


100%|██████████| 107/107 [00:01<00:00, 73.55it/s]
100%|██████████| 18/18 [00:00<00:00, 384.51it/s]
100%|██████████| 53/53 [00:00<00:00, 386.18it/s]


Loss Decreasing.. 0.492 >> 0.343 
saving model...
Epoch:2/1000.. Train Loss: 0.389.. Val Loss: 0.343.. Test Loss: 0.357.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 72.53it/s]
100%|██████████| 18/18 [00:00<00:00, 382.43it/s]
100%|██████████| 53/53 [00:00<00:00, 386.00it/s]


Loss Decreasing.. 0.343 >> 0.314 
saving model...
Epoch:3/1000.. Train Loss: 0.321.. Val Loss: 0.314.. Test Loss: 0.322.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 71.37it/s]
100%|██████████| 18/18 [00:00<00:00, 366.90it/s]
100%|██████████| 53/53 [00:00<00:00, 378.89it/s]


Epoch:4/1000.. Train Loss: 0.304.. Val Loss: 0.326.. Test Loss: 0.310.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 71.79it/s]
100%|██████████| 18/18 [00:00<00:00, 373.70it/s]
100%|██████████| 53/53 [00:00<00:00, 377.83it/s]


Loss Decreasing.. 0.314 >> 0.290 
saving model...
Epoch:5/1000.. Train Loss: 0.299.. Val Loss: 0.290.. Test Loss: 0.298.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 72.91it/s]
100%|██████████| 18/18 [00:00<00:00, 378.73it/s]
100%|██████████| 53/53 [00:00<00:00, 386.68it/s]


Epoch:6/1000.. Train Loss: 0.270.. Val Loss: 0.299.. Test Loss: 0.285.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.75it/s]
100%|██████████| 18/18 [00:00<00:00, 380.18it/s]
100%|██████████| 53/53 [00:00<00:00, 385.59it/s]


Loss Decreasing.. 0.290 >> 0.268 
saving model...
Epoch:7/1000.. Train Loss: 0.259.. Val Loss: 0.268.. Test Loss: 0.275.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.91it/s]
100%|██████████| 18/18 [00:00<00:00, 386.81it/s]
100%|██████████| 53/53 [00:00<00:00, 388.47it/s]


Loss Decreasing.. 0.268 >> 0.256 
saving model...
Epoch:8/1000.. Train Loss: 0.249.. Val Loss: 0.256.. Test Loss: 0.262.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 74.55it/s]
100%|██████████| 18/18 [00:00<00:00, 379.27it/s]
100%|██████████| 53/53 [00:00<00:00, 387.90it/s]


Loss Decreasing.. 0.256 >> 0.246 
saving model...
Epoch:9/1000.. Train Loss: 0.237.. Val Loss: 0.246.. Test Loss: 0.251.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.25it/s]
100%|██████████| 18/18 [00:00<00:00, 308.18it/s]
100%|██████████| 53/53 [00:00<00:00, 384.23it/s]


Loss Decreasing.. 0.246 >> 0.235 
saving model...
Epoch:10/1000.. Train Loss: 0.238.. Val Loss: 0.235.. Test Loss: 0.240.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.92it/s]
100%|██████████| 18/18 [00:00<00:00, 376.82it/s]
100%|██████████| 53/53 [00:00<00:00, 386.12it/s]


Epoch:11/1000.. Train Loss: 0.222.. Val Loss: 0.254.. Test Loss: 0.239.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.01it/s]
100%|██████████| 18/18 [00:00<00:00, 384.74it/s]
100%|██████████| 53/53 [00:00<00:00, 385.37it/s]


Loss Decreasing.. 0.235 >> 0.211 
saving model...
Epoch:12/1000.. Train Loss: 0.223.. Val Loss: 0.211.. Test Loss: 0.219.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.27it/s]
100%|██████████| 18/18 [00:00<00:00, 366.16it/s]
100%|██████████| 53/53 [00:00<00:00, 370.45it/s]


Loss Decreasing.. 0.211 >> 0.205 
saving model...
Epoch:13/1000.. Train Loss: 0.198.. Val Loss: 0.205.. Test Loss: 0.206.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.14it/s]
100%|██████████| 18/18 [00:00<00:00, 372.79it/s]
100%|██████████| 53/53 [00:00<00:00, 388.11it/s]


Loss Decreasing.. 0.205 >> 0.194 
saving model...
Epoch:14/1000.. Train Loss: 0.189.. Val Loss: 0.194.. Test Loss: 0.209.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 74.55it/s]
100%|██████████| 18/18 [00:00<00:00, 382.42it/s]
100%|██████████| 53/53 [00:00<00:00, 384.86it/s]


Loss Decreasing.. 0.194 >> 0.183 
saving model...
Epoch:15/1000.. Train Loss: 0.173.. Val Loss: 0.183.. Test Loss: 0.191.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.27it/s]
100%|██████████| 18/18 [00:00<00:00, 377.50it/s]
100%|██████████| 53/53 [00:00<00:00, 384.33it/s]


Loss Decreasing.. 0.183 >> 0.179 
saving model...
Epoch:16/1000.. Train Loss: 0.173.. Val Loss: 0.179.. Test Loss: 0.189.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 73.72it/s]
100%|██████████| 18/18 [00:00<00:00, 378.92it/s]
100%|██████████| 53/53 [00:00<00:00, 387.01it/s]


Loss Decreasing.. 0.179 >> 0.159 
saving model...
Epoch:17/1000.. Train Loss: 0.162.. Val Loss: 0.159.. Test Loss: 0.179.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 68.32it/s]
100%|██████████| 18/18 [00:00<00:00, 309.84it/s]
100%|██████████| 53/53 [00:00<00:00, 363.05it/s]


Loss Decreasing.. 0.159 >> 0.157 
saving model...
Epoch:18/1000.. Train Loss: 0.159.. Val Loss: 0.157.. Test Loss: 0.174.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 64.72it/s]
100%|██████████| 18/18 [00:00<00:00, 319.56it/s]
100%|██████████| 53/53 [00:00<00:00, 322.02it/s]


Loss Decreasing.. 0.157 >> 0.151 
saving model...
Epoch:19/1000.. Train Loss: 0.169.. Val Loss: 0.151.. Test Loss: 0.175.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 68.33it/s]
100%|██████████| 18/18 [00:00<00:00, 317.33it/s]
100%|██████████| 53/53 [00:00<00:00, 320.40it/s]


Loss Decreasing.. 0.151 >> 0.151 
saving model...
Epoch:20/1000.. Train Loss: 0.157.. Val Loss: 0.151.. Test Loss: 0.169.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.45it/s]
100%|██████████| 18/18 [00:00<00:00, 316.14it/s]
100%|██████████| 53/53 [00:00<00:00, 322.05it/s]


Loss Decreasing.. 0.151 >> 0.142 
saving model...
Epoch:21/1000.. Train Loss: 0.140.. Val Loss: 0.142.. Test Loss: 0.168.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 66.39it/s]
100%|██████████| 18/18 [00:00<00:00, 316.82it/s]
100%|██████████| 53/53 [00:00<00:00, 327.12it/s]


Loss Decreasing.. 0.142 >> 0.138 
saving model...
Epoch:22/1000.. Train Loss: 0.142.. Val Loss: 0.138.. Test Loss: 0.164.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 69.03it/s]
100%|██████████| 18/18 [00:00<00:00, 322.27it/s]
100%|██████████| 53/53 [00:00<00:00, 304.70it/s]


Epoch:23/1000.. Train Loss: 0.137.. Val Loss: 0.157.. Test Loss: 0.169.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 69.18it/s]
100%|██████████| 18/18 [00:00<00:00, 315.84it/s]
100%|██████████| 53/53 [00:00<00:00, 317.53it/s]


Epoch:24/1000.. Train Loss: 0.139.. Val Loss: 0.166.. Test Loss: 0.161.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.75it/s]
100%|██████████| 18/18 [00:00<00:00, 319.43it/s]
100%|██████████| 53/53 [00:00<00:00, 325.22it/s]


Loss Decreasing.. 0.138 >> 0.134 
saving model...
Epoch:25/1000.. Train Loss: 0.131.. Val Loss: 0.134.. Test Loss: 0.161.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 59.59it/s]
100%|██████████| 18/18 [00:00<00:00, 321.00it/s]
100%|██████████| 53/53 [00:00<00:00, 320.76it/s]


Epoch:26/1000.. Train Loss: 0.130.. Val Loss: 0.137.. Test Loss: 0.156.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 65.63it/s]
100%|██████████| 18/18 [00:00<00:00, 323.16it/s]
100%|██████████| 53/53 [00:00<00:00, 300.65it/s]


Loss Decreasing.. 0.134 >> 0.131 
saving model...
Epoch:27/1000.. Train Loss: 0.130.. Val Loss: 0.131.. Test Loss: 0.155.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.85it/s]
100%|██████████| 18/18 [00:00<00:00, 309.77it/s]
100%|██████████| 53/53 [00:00<00:00, 313.92it/s]


Epoch:28/1000.. Train Loss: 0.123.. Val Loss: 0.134.. Test Loss: 0.152.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.93it/s]
100%|██████████| 18/18 [00:00<00:00, 282.33it/s]
100%|██████████| 53/53 [00:00<00:00, 302.46it/s]


Epoch:29/1000.. Train Loss: 0.121.. Val Loss: 0.142.. Test Loss: 0.147.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 62.87it/s]
100%|██████████| 18/18 [00:00<00:00, 316.75it/s]
100%|██████████| 53/53 [00:00<00:00, 323.84it/s]


Loss Decreasing.. 0.131 >> 0.124 
saving model...
Epoch:30/1000.. Train Loss: 0.127.. Val Loss: 0.124.. Test Loss: 0.149.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.96it/s]
100%|██████████| 18/18 [00:00<00:00, 321.10it/s]
100%|██████████| 53/53 [00:00<00:00, 324.05it/s]


Epoch:31/1000.. Train Loss: 0.129.. Val Loss: 0.129.. Test Loss: 0.140.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 66.16it/s]
100%|██████████| 18/18 [00:00<00:00, 280.10it/s]
100%|██████████| 53/53 [00:00<00:00, 311.32it/s]


Epoch:32/1000.. Train Loss: 0.121.. Val Loss: 0.129.. Test Loss: 0.140.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.81it/s]
100%|██████████| 18/18 [00:00<00:00, 321.04it/s]
100%|██████████| 53/53 [00:00<00:00, 317.27it/s]


Epoch:33/1000.. Train Loss: 0.109.. Val Loss: 0.127.. Test Loss: 0.136.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 66.27it/s]
100%|██████████| 18/18 [00:00<00:00, 318.45it/s]
100%|██████████| 53/53 [00:00<00:00, 324.76it/s]


Loss Decreasing.. 0.124 >> 0.124 
saving model...
Epoch:34/1000.. Train Loss: 0.117.. Val Loss: 0.124.. Test Loss: 0.136.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 64.92it/s]
100%|██████████| 18/18 [00:00<00:00, 321.21it/s]
100%|██████████| 53/53 [00:00<00:00, 308.84it/s]


Epoch:35/1000.. Train Loss: 0.114.. Val Loss: 0.126.. Test Loss: 0.130.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.73it/s]
100%|██████████| 18/18 [00:00<00:00, 325.05it/s]
100%|██████████| 53/53 [00:00<00:00, 323.99it/s]


Epoch:36/1000.. Train Loss: 0.118.. Val Loss: 0.126.. Test Loss: 0.133.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 67.00it/s]
100%|██████████| 18/18 [00:00<00:00, 327.47it/s]
100%|██████████| 53/53 [00:00<00:00, 321.73it/s]


Epoch:37/1000.. Train Loss: 0.112.. Val Loss: 0.130.. Test Loss: 0.128.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 64.74it/s]
100%|██████████| 18/18 [00:00<00:00, 322.61it/s]
100%|██████████| 53/53 [00:00<00:00, 323.98it/s]


Epoch:38/1000.. Train Loss: 0.104.. Val Loss: 0.129.. Test Loss: 0.133.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 68.24it/s]
100%|██████████| 18/18 [00:00<00:00, 320.27it/s]
100%|██████████| 53/53 [00:00<00:00, 324.46it/s]


Epoch:39/1000.. Train Loss: 0.112.. Val Loss: 0.124.. Test Loss: 0.133.. Time: 0.03m


100%|██████████| 107/107 [00:01<00:00, 68.89it/s]
100%|██████████| 18/18 [00:00<00:00, 318.31it/s]
100%|██████████| 53/53 [00:00<00:00, 324.78it/s]


Epoch:40/1000.. Train Loss: 0.105.. Val Loss: 0.135.. Test Loss: 0.134.. Time: 0.03m


 74%|███████▍  | 79/107 [00:01<00:00, 67.13it/s]

In [None]:
fig, ax = plt.subplots()

ax.plot(training.results['Train'], label='Train')
ax.plot(training.results['Validation'], label='Validation')
ax.plot(training.results['Test'], label='Test')
ax.legend()

## Evaluation and plotting classes

In [None]:
class OSAEvaluator:
    
    def evaluate_OSA(self, dataset, model):
        
        torch.cuda.empty_cache()
        Y_real = []
        Y_pred = []
        for i, data in enumerate(tqdm(dataset)):
            src, tgt, y_real = data
            src, tgt, y_real = src.to(device), tgt.to(device), y_real.to(device)
            y_pred = model(src, tgt)
            Y_real.append(y_real.reshape(1,-1))
            Y_pred.append(y_pred)
        
        Y_real = torch.vstack(Y_real).cpu()
        Y_pred = torch.vstack(Y_pred).cpu().detach()
        print('********** OSA Evaluation summary **********')
        print(f'OSA MSE: {mean_squared_error(Y_real, Y_pred)}')
        print(f'OSA RMSE: {np.sqrt(mean_squared_error(Y_real, Y_pred))}')
        print(f'OSA R2 score: {r2_score(Y_real, Y_pred)}')
        print('********************************************')
        return Y_real, Y_pred
            
class FSEvaluator:
    
    def evaluate_FS(self, dataset, model):
        
        torch.cuda.empty_cache()
        Y_real = []
        Y_pred = []
        tgt_sim = None
        for i, data in enumerate(tqdm(dataset)):
            src, tgt, y_real = data
            src, tgt, y_real = src.to(device), tgt.to(device), y_real.to(device)
            if tgt_sim is None:
                tgt_sim = tgt
            else:
                tgt_sim[:-1,:] = tgt_sim[1:,:]
                tgt_sim[-1,:] = y_pred
            y_pred = model(src, tgt_sim)
            Y_real.append(y_real.reshape(1,-1))
            Y_pred.append(y_pred)
        
        Y_real = torch.vstack(Y_real).cpu()
        Y_pred = torch.vstack(Y_pred).cpu().detach()
        print('*********** FS Evaluation summary **********')
        print(f'FS MSE: {mean_squared_error(Y_real, Y_pred)}')
        print(f'FS RMSE: {np.sqrt(mean_squared_error(Y_real, Y_pred))}')
        print(f'FS R2 score: {r2_score(Y_real, Y_pred)}')
        print('********************************************')
        return Y_real, Y_pred
    
class Evaluator(OSAEvaluator, FSEvaluator):
    
    pass
        

In [None]:
evaluator = Evaluator()
Y_real, Y_pred_OSA = evaluator.evaluate_OSA(dataset, model)
Y_real, Y_pred_FS = evaluator.evaluate_FS(dataset, model)

In [None]:
fig, ax = plt.subplots()
ax.plot(Y_real, label='Real')
ax.plot(Y_pred_OSA, label='OSA')
ax.plot(Y_pred_FS, label='FS')
ax.legend()

In [None]:
evaluator = Evaluator()
Y_real, Y_pred_OSA = evaluator.evaluate_OSA(dataset, best_model)
Y_real, Y_pred_FS = evaluator.evaluate_FS(dataset, best_model)

In [None]:
fig, ax = plt.subplots()
ax.plot(Y_real, label='Real')
ax.plot(Y_pred_OSA, label='OSA')
ax.plot(Y_pred_FS, label='FS')
ax.legend()