In [1]:
import math
import json
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

!nvidia-smi

Mon Jun 19 14:55:23 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:15:00.0 Off |                    0 |
| N/A   24C    P0    38W / 300W |      0MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# Dispositivo onde tensores serão criados, armazenados e processados
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# Randon Seed fixa para resultados reprodutíveis
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

cuda:0


## Data Loader

In [3]:
class WellLoader(Dataset):
    
    def __init__(self, 
                 path, 
                 wells, 
                 var_in, 
                 var_out,
                 normalizing_percentile=90.0,
                 normalizing_split=0.2,
                 normalizer=RobustScaler,
                 max_sequence=16, 
                 step=1):
        
        self.path = path
        with open(self.path + '/metadata.json', 'r') as metafile:
            self.metadata = json.loads(metafile.read())
        self.wells = wells
        self.var_in = var_in
        self.var_out = var_out
        self.normalizing_percentile=normalizing_percentile
        self.normalizing_split = normalizing_split
        self.normalizer = normalizer
        self.max_sequence = max_sequence
        self.step = step
        self.batches_X = None
        self.batches_Y = None
        self.outputs = None
        self.normalizers = []
        
        indexes = self.get_wells_index(self.wells)
        self.load_data_by_index(indexes)
        
    def get_wells_index(self, wells):
        
        indexes = []
        for well, filt in wells:
            indexes.extend([(meta['INDEX'], filt) for meta in self.metadata if meta['WELL'] == well])
        return indexes
    
    def load_data_by_index(self, indexes):
        
        batches_X = []
        batches_Y = []
        outputs = []
        for index, filt in indexes:
            data = pd.read_json(f'{self.path}/{index}.json')#.reset_index()
            # Armengue: Por liq vol para preencher o dataset
            data['BORE_LIQ_VOL'] = data['BORE_OIL_VOL'] + data['BORE_WAT_VOL']
            data = data[self.var_in + self.var_out].dropna().reset_index(drop=True)
            X = data[self.var_in].values[filt,:]
            Y = data[self.var_out].values[filt,:]
            X_base, _, Y_base, _ = train_test_split(X, Y, test_size = self.normalizing_split)
            #scaler_X = X_base.max(axis=0, keepdims=True)
            #scaler_Y = Y_base.max(axis=0, keepdims=True)
            #scaler_X = self.normalizer().fit(X_base)
            #scaler_Y = self.normalizer().fit(Y_base)
            scaler_X = np.percentile(X_base,self.normalizing_percentile,axis=0,keepdims=True)
            scaler_Y = np.percentile(Y_base,self.normalizing_percentile,axis=0,keepdims=True)
            self.normalizers.append((scaler_X, scaler_Y))
            #X, Y = scaler_X.transform(X), scaler_Y.transform(Y)
            X, Y = X / scaler_X, Y / scaler_Y
            X, Y = torch.from_numpy(X.astype('float32')), torch.from_numpy(Y.astype('float32'))
            output = Y[self.max_sequence::self.step]
            #print(Y.shape)
            #X = torch.split(X, self.max_sequence, dim= 0)
            #Y = torch.split(Y, self.max_sequence, dim= 0)
            X = X.unfold(0,self.max_sequence, self.step)
            Y = Y.unfold(0,self.max_sequence, self.step)
            batches_X.append(X[:-1,:,:])
            batches_Y.append(Y[:-1,:,:])
            outputs.append(Y[1:,:,:])
            #print(X.shape)
        self.batches_X = torch.concat(batches_X, axis=0)
        self.batches_Y = torch.concat(batches_Y, axis=0)
        self.outputs = torch.concat(outputs, axis=0)
            
    def __len__(self):
        
        return self.outputs.shape[0]
    
    def __getitem__(self, idx):
                        
        srcs = self.batches_X[idx,:,:]
        trgts = self.batches_Y[idx,:,:]
        output = self.outputs[idx,:,:]
        
        return srcs.permute(1,0), trgts.permute(1,0), output.permute(1,0)
        
        
                 

In [4]:
path = './dataset/volve'
wells = [
    ('15/9-F-11', slice(15, None)),
    ('15/9-F-12', slice(None, 800)),
    ('15/9-F-14', slice(200, None)),
    ('15/9-F-15 D', slice(10, 900)),
    #('15/9-F-5', slice(None, None)),
][::-1]

#wells = [
#    ('15/9-F-11', slice(15, 600)),
#    ('15/9-F-12', slice(None, 800)),
#    ('15/9-F-14', slice(1900, None)),
#    ('15/9-F-15 D', slice(10, 900)),
#    #('15/9-F-5', slice(None, None)),
#][::-1]

var_in = [
        'AVG_DOWNHOLE_PRESSURE',
        'AVG_WHP_P',
        'AVG_CHOKE_SIZE_P',
        'AVG_WHT_P',
        'AVG_DOWNHOLE_TEMPERATURE',
]

var_out = [
        #'BORE_OIL_VOL',
        'BORE_LIQ_VOL',
        #'BORE_GAS_VOL',
        #'BORE_WAT_VOL',
]


dataset = WellLoader(path, wells, var_in, var_out, max_sequence = 16)
print(f'Total data for training :{len(dataset)}')

Total data for training :5424


In [5]:
path = './dataset/volve'
wells = [
    ('15/9-F-1 C', slice(28, None)),
]

var_in = [
        'AVG_DOWNHOLE_PRESSURE',
        'AVG_WHP_P',
        'AVG_CHOKE_SIZE_P',
        'AVG_WHT_P',
        'AVG_DOWNHOLE_TEMPERATURE',
]

var_out = [
        #'BORE_OIL_VOL',
        'BORE_LIQ_VOL',
        #'BORE_GAS_VOL',
        #'BORE_WAT_VOL',
]


test_dataset = WellLoader(path, wells, var_in, var_out, max_sequence = 16)
print(f'Total data for testing :{len(test_dataset)}')

Total data for testing :699


In [6]:
class SubsetSplitter:
    
    def __init__(self, batch_size, validation_split, test_split, shuffle=False):
        
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.test_split = test_split
        self.shuffle = shuffle
        
    def __call__(self, dataset:Dataset):
        
        dataset_size = len(dataset)
        indices = list(range(dataset_size))
        validation_split = int(np.floor(self.validation_split * dataset_size))
        test_split = int(np.floor(self.test_split * dataset_size))
        train_split = dataset_size - validation_split - test_split
        # Extracting test independently of others
        test_indices = indices[train_split + validation_split:]
        indices = indices[:train_split + validation_split]
        if self.shuffle :
            np.random.shuffle(indices)
        train_indices = indices[:train_split]
        validation_indices = indices[train_split:]

        # Creating PT data samplers and loaders:
        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(validation_indices)
        test_sampler = SubsetRandomSampler(test_indices)

        train_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, 
                                                   sampler=train_sampler)
        validation_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size,
                                                    sampler=valid_sampler)
        test_loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size,
                                                    sampler=test_sampler)
        
        return train_loader, validation_loader, test_loader

In [7]:
splitter = SubsetSplitter(16, 0.25, 0.05)
train_loader, validation_loader, test_loader = splitter(dataset)

## Model definition

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, dim_model, dropout_p, max_len):
        super().__init__()
        # Modified version from: https://pytorch.org/tutorials/beginner/transformer_tutorial.html
        # max_len determines how far the position can have an effect on a token (window)
        
        # Info
        self.dropout = nn.Dropout(dropout_p)
        
        # Encoding - From formula
        pos_encoding = torch.zeros(max_len, dim_model)
        positions_list = torch.arange(0, max_len, dtype=torch.float).view(-1, 1) # 0, 1, 2, 3, 4, 5
        division_term = torch.exp(torch.arange(0, dim_model, 2).float() * (-math.log(10000.0)) / dim_model) # 1000^(2i/dim_model)
        
        # PE(pos, 2i) = sin(pos/1000^(2i/dim_model))
        pos_encoding[:, 0::2] = torch.sin(positions_list * division_term)
        
        # PE(pos, 2i + 1) = cos(pos/1000^(2i/dim_model))
        pos_encoding[:, 1::2] = torch.cos(positions_list * division_term)
        
        # Saving buffer (same as parameter without gradients needed)
        pos_encoding = pos_encoding.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pos_encoding",pos_encoding)
        
    def forward(self, token_embedding: torch.tensor) -> torch.tensor:
        # Residual connection + pos encoding
        return self.dropout(token_embedding + self.pos_encoding[:token_embedding.size(0), :])
    
    
positional_encoding = PositionalEncoding(dim_model = 10, dropout_p=0.3, max_len=1200)
tensor = torch.zeros((1,2,10))
tensor
encoded = positional_encoding(tensor)
encoded

tensor([[[0.0000, 0.0000, 0.0000, 1.4286, 0.0000, 1.4286, 0.0000, 1.4286,
          0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4286, 0.0000, 0.0000,
          0.0000, 1.4286]]])

In [9]:
class TSTransformer(nn.Module):
    """
    Model from "A detailed guide to Pytorch's nn.Transformer() module.", by
    Daniel Melchor: https://medium.com/@danielmelchor/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1
    """
    # Constructor
    def __init__(
        self,
        num_outputs,
        src_dim,
        tgt_dim,
        src_heads,
        tgt_heads,
        num_encoder_layers,
        num_decoder_layers,
        dropout_p,
        dim_feedforward =2048,
        num_linear_layers=0,
        norm_first=False
    ):
        super().__init__()

        # INFO
        self.model_type = "Transformer"
        self.src_dim = src_dim
        self.tgt_dim = tgt_dim

        # LAYERS
        
        # MONTANDO O TRANSFORMER
        # Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=src_dim,
            nhead=src_heads,
            dim_feedforward=dim_feedforward,
            dropout=dropout_p,
            batch_first=True,
            norm_first=norm_first
        )
        self.encoder = nn.TransformerEncoder(
            encoder_layer = encoder_layer,
            num_layers = num_encoder_layers,
            norm=None,
        )
        
        # Decoder
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=tgt_dim,
            nhead=tgt_heads,
            dim_feedforward=dim_feedforward,
            dropout=dropout_p,
            batch_first=True,
            norm_first=norm_first
        )
        self.decoder = nn.TransformerDecoder(
            decoder_layer = decoder_layer,
            num_layers = num_decoder_layers,
            norm=None,
        )
            
        # Como encoder e decoder podem ter dimenoes
        self.memory_match = nn.Linear(src_dim, tgt_dim)
        
        self.linear_layers = nn.ModuleList()
        for i in range(num_linear_layers):
            self.linear_layers.append(nn.Linear(dim_model, dim_model))
            self.linear_layers.append(nn.ReLU6())
        self.out = nn.Linear(tgt_dim, num_outputs)
        
        
    def forward(self, src, tgt, src_mask=None, tgt_mask=None, src_pad_mask=None, tgt_pad_mask=None, is_causal=False):
        # Src size must be (batch_size, src sequence length)
        # Tgt size must be (batch_size, tgt sequence length)
        
        
        

        # Step1 - Passing source through encoder
        memory = self.encoder(src, 
                              mask=src_mask,
                              src_key_padding_mask=src_pad_mask,
                              is_causal=is_causal)
        
        # Step2 - Matching memory sahpe to tgt shape
        memory = self.memory_match(memory)
        
        
        # Step3 - Passing tgt and memory through decoder
        transformer_out = self.decoder(tgt, 
                                       memory,
                                       tgt_mask=tgt_mask,
                                       memory_mask=src_mask,
                                       tgt_key_padding_mask=tgt_pad_mask,
                                       memory_key_padding_mask=src_pad_mask,
                                      )
        
        
        for linear in self.linear_layers:
            transformer_out = linear(transformer_out)
        out = transformer_out
        #out = torch.add(out, tgt)
        
        #recuperando informacao de escala
        #out = torch.mul(out, tgt)
        #bias = self.bias_layer(tgt)
        #out = torch.add(out, bias)
        
        out = self.out(out)
        
        
        return out
      
    def get_tgt_mask(self, size) -> torch.tensor:
        # Generates a squeare matrix where the each row allows one word more to be seen
        mask = torch.tril(torch.ones(size, size) == 1) # Lower triangular matrix
        mask = mask.float()
        mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
        mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0
        
        # EX for size=5:
        # [[0., -inf, -inf, -inf, -inf],
        #  [0.,   0., -inf, -inf, -inf],
        #  [0.,   0.,   0., -inf, -inf],
        #  [0.,   0.,   0.,   0., -inf],
        #  [0.,   0.,   0.,   0.,   0.]]
        
        return mask
    
    def create_pad_mask(self, matrix: torch.tensor, pad_token: int) -> torch.tensor:
        # If matrix = [1,2,3,0,0,0] where pad_token=0, the result mask is
        # [False, False, False, True, True, True]
        return (matrix == pad_token)

In [10]:
class Training:
    
    def __init__(self, epochs, loss, optimizer, scheduler, path, model_name='Transformer', model_size=8, early_stop=True, patience=5):
        
        self.loss = loss
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.epochs = epochs
        self.path = path
        self.model_name = model_name
        self.model_size = model_size
        self.early_stop_flag = early_stop
        self.patience = patience
        self.clear_results()
        
    def clear_results(self):
        
        self.results = {
            'Train':[],
            'Validation':[],
            'Test':[],
        }
        
    def fit(self, model, train_loader, validation_loader, test_loader):
        
        self.clear_results()
        torch.cuda.empty_cache()
        decrease = self.patience
        not_improved = 0
        
        model.to(device)
        fit_time = time.time()
        
        for e in range(self.epochs):
            since = time.time()
            running_loss = 0
            #training loop
            model.train()
            self.train_loop(model, train_loader)
            model.eval()
            self.validation_loop(model, validation_loader)
            self.test_loop(model, test_loader)
            decrease, not_improved = self.early_stopping(validation_loader, decrease)
            if not_improved == 1 and self.early_stop_flag:
                print('[***] end training ...') 
                break
            loss_per_training_batch = self.results['Train'][-1]
            loss_per_validation_batch = self.results['Validation'][-1]
            loss_per_test_batch = self.results['Test'][-1]
            print("Epoch:{}/{}..".format(e+1, self.epochs),
                  "Train Loss: {:.3f}..".format(loss_per_training_batch),
                  "Val Loss: {:.3f}..".format(loss_per_validation_batch),
                  "Test Loss: {:.3f}..".format(loss_per_test_batch),
                  "Time: {:.2f}m".format((time.time()-since)/60))
        print('Total time: {:.2f} m' .format((time.time()- fit_time)/60))
        
    def train_loop(self, model, train_loader):
        
        mask = model.get_tgt_mask(self.model_size).to(device)
        running_loss = 0.0
        for i, data in enumerate(tqdm(train_loader)):
            #training phase
            X, y_tgt, y_out = data
            X, y_tgt, y_out = X.to(device), y_tgt.to(device), y_out.to(device)
            
            output = model(X, y_tgt, src_mask=mask, tgt_mask=mask)
            #print(X.shape, y_tgt.shape, y_out.shape, output.shape)
            loss = self.loss(output[None,-1,:], y_out[None,-1,:])
            #backward
            loss.backward()
            self.optimizer.step() #update weight          
            self.optimizer.zero_grad() #reset gradient
            
            #step the learning rate
            if not self.scheduler is None:
                self.scheduler.step()
            running_loss += loss.item()
        
        self.results['Train'].append(running_loss/len(train_loader))
    
    
    def validation_loop(self, model, validation_loader):
        
        mask = model.get_tgt_mask(self.model_size).to(device)
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(tqdm(validation_loader)):
                #training phase
                X, y_tgt, y_out = data
                X, y_tgt, y_out = X.to(device), y_tgt.to(device), y_out.to(device)
                
                output = model(X, y_tgt, src_mask=mask, tgt_mask=mask)
                loss = self.loss(output[None,-1,:], y_out[None,-1,:])
                
                running_loss += loss.item()
        
        self.results['Validation'].append(running_loss/len(validation_loader))
        
    def test_loop(self, model, test_loader):
        
        mask = model.get_tgt_mask(self.model_size).to(device)
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(tqdm(test_loader)):
                #training phase
                X, y_tgt, y_out = data
                X, y_tgt, y_out = X.to(device), y_tgt.to(device), y_out.to(device)
                
                output = model(X, y_tgt, src_mask=mask, tgt_mask=mask)
                loss = self.loss(output[None,-1,:], y_out[None,-1,:])
                
                running_loss += loss.item()
        
        self.results['Test'].append(running_loss/len(test_loader))
        
    def early_stopping(self, validation_loader, decrease):
        
        loss_per_validation_batch = self.results['Validation'][-1]
        min_loss = np.min(self.results['Validation'][:-1] + [np.inf])
        if min_loss >= self.results['Validation'][-1]:
            print('Loss Decreasing.. {:.3f} >> {:.3f} '.format(min_loss, loss_per_validation_batch))
            decrease = self.patience
            print('saving model...')
            torch.save(model, self.path + f'/{self.model_name}.pt')
        else:
            decrease -= 1
        if decrease < 0:     
                not_improved = 1
        else:
            not_improved = 0
        return decrease, not_improved
    
    def get_best_model(self):
        
        model = torch.load(self.path + f'/{self.model_name}.pt')
        return model
            
            

In [11]:
model = TSTransformer(
    num_outputs=1,
    src_dim=5,
    tgt_dim=1,
    src_heads=1,
    tgt_heads=1,
    num_encoder_layers=24,
    num_decoder_layers=24,
    dropout_p=0.1,
    dim_feedforward =20,
    num_linear_layers=0,
    norm_first=False,
).to(device)

print(model)

lr_ = 5e-4
epoch = 1000
weight_decay = 1e-4
path = '.'
model_name = 'TRANSPOSED-MULTI'

loss = torch.nn.MSELoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=lr_)
#optimizer = torch.optim.SGD(model.parameters(), lr=lr_)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr_, weight_decay=weight_decay)
sched = None
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, lr_, epochs=epoch,
                                            steps_per_epoch=len(train_loader))
training = Training(epoch, loss, optimizer, sched, model_name=model_name, model_size = dataset.max_sequence, path=path, early_stop=True, patience=20)

TSTransformer(
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-23): 24 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=5, out_features=5, bias=True)
        )
        (linear1): Linear(in_features=5, out_features=20, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=20, out_features=5, bias=True)
        (norm1): LayerNorm((5,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((5,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (decoder): TransformerDecoder(
    (layers): ModuleList(
      (0-23): 24 x TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=1, out_features=1, bias=True)
        )
        (multihead_attn): MultiheadAttenti

In [None]:
training.fit(model, train_loader, validation_loader, test_loader)
best_model = training.get_best_model()

100%|██████████| 238/238 [00:29<00:00,  8.11it/s]
  return torch._native_multi_head_attention(
100%|██████████| 85/85 [00:02<00:00, 41.94it/s]
100%|██████████| 17/17 [00:00<00:00, 42.32it/s]


Loss Decreasing.. inf >> 0.214 
saving model...
Epoch:1/1000.. Train Loss: 0.175.. Val Loss: 0.214.. Test Loss: 0.202.. Time: 0.53m


100%|██████████| 238/238 [00:27<00:00,  8.58it/s]
100%|██████████| 85/85 [00:02<00:00, 40.81it/s]
100%|██████████| 17/17 [00:00<00:00, 40.01it/s]


Epoch:2/1000.. Train Loss: 0.158.. Val Loss: 0.222.. Test Loss: 0.223.. Time: 0.50m


100%|██████████| 238/238 [00:28<00:00,  8.32it/s]
100%|██████████| 85/85 [00:02<00:00, 42.27it/s]
100%|██████████| 17/17 [00:00<00:00, 41.79it/s]


Loss Decreasing.. 0.214 >> 0.202 
saving model...
Epoch:3/1000.. Train Loss: 0.159.. Val Loss: 0.202.. Test Loss: 0.233.. Time: 0.52m


100%|██████████| 238/238 [00:29<00:00,  8.06it/s]
100%|██████████| 85/85 [00:02<00:00, 39.70it/s]
100%|██████████| 17/17 [00:00<00:00, 40.01it/s]


Epoch:4/1000.. Train Loss: 0.159.. Val Loss: 0.206.. Test Loss: 0.199.. Time: 0.54m


100%|██████████| 238/238 [00:30<00:00,  7.76it/s]
100%|██████████| 85/85 [00:02<00:00, 39.08it/s]
100%|██████████| 17/17 [00:00<00:00, 39.26it/s]


Loss Decreasing.. 0.202 >> 0.196 
saving model...
Epoch:5/1000.. Train Loss: 0.172.. Val Loss: 0.196.. Test Loss: 0.165.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.88it/s]
100%|██████████| 85/85 [00:02<00:00, 39.79it/s]
100%|██████████| 17/17 [00:00<00:00, 39.38it/s]


Epoch:6/1000.. Train Loss: 0.134.. Val Loss: 0.246.. Test Loss: 0.167.. Time: 0.55m


100%|██████████| 238/238 [00:30<00:00,  7.69it/s]
100%|██████████| 85/85 [00:02<00:00, 39.69it/s]
100%|██████████| 17/17 [00:00<00:00, 39.06it/s]


Epoch:7/1000.. Train Loss: 0.164.. Val Loss: 0.271.. Test Loss: 0.156.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.82it/s]
100%|██████████| 85/85 [00:02<00:00, 40.03it/s]
100%|██████████| 17/17 [00:00<00:00, 39.87it/s]


Epoch:8/1000.. Train Loss: 0.164.. Val Loss: 0.235.. Test Loss: 0.129.. Time: 0.55m


100%|██████████| 238/238 [00:31<00:00,  7.60it/s]
100%|██████████| 85/85 [00:02<00:00, 37.35it/s]
100%|██████████| 17/17 [00:00<00:00, 39.21it/s]


Epoch:9/1000.. Train Loss: 0.175.. Val Loss: 0.223.. Test Loss: 0.153.. Time: 0.57m


100%|██████████| 238/238 [00:29<00:00,  8.13it/s]
100%|██████████| 85/85 [00:02<00:00, 38.79it/s]
100%|██████████| 17/17 [00:00<00:00, 38.72it/s]


Epoch:10/1000.. Train Loss: 0.183.. Val Loss: 0.223.. Test Loss: 0.168.. Time: 0.53m


100%|██████████| 238/238 [00:28<00:00,  8.30it/s]
100%|██████████| 85/85 [00:01<00:00, 42.57it/s]
100%|██████████| 17/17 [00:00<00:00, 42.00it/s]


Epoch:11/1000.. Train Loss: 0.175.. Val Loss: 0.219.. Test Loss: 0.192.. Time: 0.52m


100%|██████████| 238/238 [00:28<00:00,  8.29it/s]
100%|██████████| 85/85 [00:02<00:00, 40.07it/s]
100%|██████████| 17/17 [00:00<00:00, 41.52it/s]


Epoch:12/1000.. Train Loss: 0.186.. Val Loss: 0.274.. Test Loss: 0.187.. Time: 0.52m


100%|██████████| 238/238 [00:28<00:00,  8.34it/s]
100%|██████████| 85/85 [00:02<00:00, 42.37it/s]
100%|██████████| 17/17 [00:00<00:00, 42.48it/s]


Epoch:13/1000.. Train Loss: 0.177.. Val Loss: 0.256.. Test Loss: 0.138.. Time: 0.52m


100%|██████████| 238/238 [00:28<00:00,  8.43it/s]
100%|██████████| 85/85 [00:02<00:00, 42.36it/s]
100%|██████████| 17/17 [00:00<00:00, 41.00it/s]


Epoch:14/1000.. Train Loss: 0.177.. Val Loss: 0.242.. Test Loss: 0.170.. Time: 0.51m


100%|██████████| 238/238 [00:28<00:00,  8.38it/s]
100%|██████████| 85/85 [00:02<00:00, 41.72it/s]
100%|██████████| 17/17 [00:00<00:00, 42.11it/s]


Epoch:15/1000.. Train Loss: 0.146.. Val Loss: 0.227.. Test Loss: 0.263.. Time: 0.52m


100%|██████████| 238/238 [00:28<00:00,  8.27it/s]
100%|██████████| 85/85 [00:02<00:00, 42.09it/s]
100%|██████████| 17/17 [00:00<00:00, 42.24it/s]


Epoch:16/1000.. Train Loss: 0.165.. Val Loss: 0.233.. Test Loss: 0.142.. Time: 0.52m


100%|██████████| 238/238 [00:28<00:00,  8.22it/s]
100%|██████████| 85/85 [00:02<00:00, 40.64it/s]
100%|██████████| 17/17 [00:00<00:00, 41.96it/s]


Loss Decreasing.. 0.196 >> 0.175 
saving model...
Epoch:17/1000.. Train Loss: 0.172.. Val Loss: 0.175.. Test Loss: 0.189.. Time: 0.53m


100%|██████████| 238/238 [00:28<00:00,  8.47it/s]
100%|██████████| 85/85 [00:02<00:00, 42.08it/s]
100%|██████████| 17/17 [00:00<00:00, 42.15it/s]


Epoch:18/1000.. Train Loss: 0.169.. Val Loss: 0.248.. Test Loss: 0.183.. Time: 0.51m


100%|██████████| 238/238 [00:25<00:00,  9.51it/s]
100%|██████████| 85/85 [00:02<00:00, 41.77it/s]
100%|██████████| 17/17 [00:00<00:00, 41.90it/s]


Epoch:19/1000.. Train Loss: 0.178.. Val Loss: 0.294.. Test Loss: 0.155.. Time: 0.46m


100%|██████████| 238/238 [00:28<00:00,  8.38it/s]
100%|██████████| 85/85 [00:01<00:00, 42.58it/s]
100%|██████████| 17/17 [00:00<00:00, 42.29it/s]


Epoch:20/1000.. Train Loss: 0.158.. Val Loss: 0.223.. Test Loss: 0.116.. Time: 0.51m


100%|██████████| 238/238 [00:28<00:00,  8.24it/s]
100%|██████████| 85/85 [00:02<00:00, 39.40it/s]
100%|██████████| 17/17 [00:00<00:00, 38.43it/s]


Epoch:21/1000.. Train Loss: 0.157.. Val Loss: 0.218.. Test Loss: 0.165.. Time: 0.53m


100%|██████████| 238/238 [00:30<00:00,  7.91it/s]
100%|██████████| 85/85 [00:01<00:00, 42.73it/s]
100%|██████████| 17/17 [00:00<00:00, 42.43it/s]


Epoch:22/1000.. Train Loss: 0.164.. Val Loss: 0.266.. Test Loss: 0.103.. Time: 0.54m


100%|██████████| 238/238 [00:28<00:00,  8.35it/s]
100%|██████████| 85/85 [00:02<00:00, 41.81it/s]
100%|██████████| 17/17 [00:00<00:00, 42.00it/s]


Epoch:23/1000.. Train Loss: 0.164.. Val Loss: 0.231.. Test Loss: 0.243.. Time: 0.52m


100%|██████████| 238/238 [00:27<00:00,  8.60it/s]
100%|██████████| 85/85 [00:02<00:00, 41.27it/s]
100%|██████████| 17/17 [00:00<00:00, 41.58it/s]


Epoch:24/1000.. Train Loss: 0.188.. Val Loss: 0.240.. Test Loss: 0.117.. Time: 0.50m


100%|██████████| 238/238 [00:29<00:00,  8.01it/s]
100%|██████████| 85/85 [00:02<00:00, 39.32it/s]
100%|██████████| 17/17 [00:00<00:00, 40.07it/s]


Epoch:25/1000.. Train Loss: 0.169.. Val Loss: 0.295.. Test Loss: 0.209.. Time: 0.54m


100%|██████████| 238/238 [00:30<00:00,  7.73it/s]
100%|██████████| 85/85 [00:02<00:00, 37.84it/s]
100%|██████████| 17/17 [00:00<00:00, 38.92it/s]


Epoch:26/1000.. Train Loss: 0.175.. Val Loss: 0.190.. Test Loss: 0.138.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.73it/s]
100%|██████████| 85/85 [00:02<00:00, 39.16it/s]
100%|██████████| 17/17 [00:00<00:00, 37.63it/s]


Epoch:27/1000.. Train Loss: 0.158.. Val Loss: 0.244.. Test Loss: 0.182.. Time: 0.56m


100%|██████████| 238/238 [00:29<00:00,  7.95it/s]
100%|██████████| 85/85 [00:02<00:00, 39.59it/s]
100%|██████████| 17/17 [00:00<00:00, 37.13it/s]


Epoch:28/1000.. Train Loss: 0.160.. Val Loss: 0.218.. Test Loss: 0.189.. Time: 0.54m


100%|██████████| 238/238 [00:31<00:00,  7.66it/s]
100%|██████████| 85/85 [00:02<00:00, 39.15it/s]
100%|██████████| 17/17 [00:00<00:00, 39.37it/s]


Epoch:29/1000.. Train Loss: 0.167.. Val Loss: 0.243.. Test Loss: 0.197.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.88it/s]
100%|██████████| 85/85 [00:02<00:00, 39.51it/s]
100%|██████████| 17/17 [00:00<00:00, 37.52it/s]


Epoch:30/1000.. Train Loss: 0.162.. Val Loss: 0.190.. Test Loss: 0.124.. Time: 0.55m


100%|██████████| 238/238 [00:31<00:00,  7.66it/s]
100%|██████████| 85/85 [00:02<00:00, 39.95it/s]
100%|██████████| 17/17 [00:00<00:00, 39.70it/s]


Epoch:31/1000.. Train Loss: 0.129.. Val Loss: 0.241.. Test Loss: 0.132.. Time: 0.56m


100%|██████████| 238/238 [00:31<00:00,  7.66it/s]
100%|██████████| 85/85 [00:02<00:00, 39.09it/s]
100%|██████████| 17/17 [00:00<00:00, 38.76it/s]


Epoch:32/1000.. Train Loss: 0.155.. Val Loss: 0.328.. Test Loss: 0.135.. Time: 0.56m


100%|██████████| 238/238 [00:29<00:00,  7.96it/s]
100%|██████████| 85/85 [00:02<00:00, 39.97it/s]
100%|██████████| 17/17 [00:00<00:00, 39.28it/s]


Epoch:33/1000.. Train Loss: 0.150.. Val Loss: 0.263.. Test Loss: 0.190.. Time: 0.54m


100%|██████████| 238/238 [00:30<00:00,  7.76it/s]
100%|██████████| 85/85 [00:02<00:00, 39.38it/s]
100%|██████████| 17/17 [00:00<00:00, 39.41it/s]


Epoch:34/1000.. Train Loss: 0.176.. Val Loss: 0.206.. Test Loss: 0.183.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.78it/s]
100%|██████████| 85/85 [00:02<00:00, 38.50it/s]
100%|██████████| 17/17 [00:00<00:00, 38.35it/s]


Epoch:35/1000.. Train Loss: 0.179.. Val Loss: 0.261.. Test Loss: 0.134.. Time: 0.55m


100%|██████████| 238/238 [00:30<00:00,  7.86it/s]
100%|██████████| 85/85 [00:02<00:00, 39.07it/s]
100%|██████████| 17/17 [00:00<00:00, 39.41it/s]


Loss Decreasing.. 0.175 >> 0.171 
saving model...
Epoch:36/1000.. Train Loss: 0.186.. Val Loss: 0.171.. Test Loss: 0.191.. Time: 0.55m


100%|██████████| 238/238 [00:30<00:00,  7.71it/s]
100%|██████████| 85/85 [00:02<00:00, 38.80it/s]
100%|██████████| 17/17 [00:00<00:00, 40.11it/s]


Epoch:37/1000.. Train Loss: 0.172.. Val Loss: 0.207.. Test Loss: 0.175.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.74it/s]
100%|██████████| 85/85 [00:02<00:00, 40.46it/s]
100%|██████████| 17/17 [00:00<00:00, 39.88it/s]


Epoch:38/1000.. Train Loss: 0.149.. Val Loss: 0.259.. Test Loss: 0.203.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.68it/s]
100%|██████████| 85/85 [00:02<00:00, 39.00it/s]
100%|██████████| 17/17 [00:00<00:00, 40.03it/s]


Epoch:39/1000.. Train Loss: 0.167.. Val Loss: 0.263.. Test Loss: 0.225.. Time: 0.56m


100%|██████████| 238/238 [00:30<00:00,  7.79it/s]
100%|██████████| 85/85 [00:02<00:00, 39.92it/s]
100%|██████████| 17/17 [00:00<00:00, 38.23it/s]


Epoch:40/1000.. Train Loss: 0.152.. Val Loss: 0.269.. Test Loss: 0.219.. Time: 0.55m


100%|██████████| 238/238 [00:30<00:00,  7.85it/s]
100%|██████████| 85/85 [00:02<00:00, 39.66it/s]
100%|██████████| 17/17 [00:00<00:00, 39.77it/s]


Epoch:41/1000.. Train Loss: 0.165.. Val Loss: 0.202.. Test Loss: 0.124.. Time: 0.55m


100%|██████████| 238/238 [00:27<00:00,  8.57it/s]
100%|██████████| 85/85 [00:01<00:00, 42.92it/s]
100%|██████████| 17/17 [00:00<00:00, 42.95it/s]


Epoch:42/1000.. Train Loss: 0.173.. Val Loss: 0.268.. Test Loss: 0.149.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.61it/s]
100%|██████████| 85/85 [00:01<00:00, 42.92it/s]
100%|██████████| 17/17 [00:00<00:00, 43.10it/s]


Epoch:43/1000.. Train Loss: 0.139.. Val Loss: 0.190.. Test Loss: 0.176.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.57it/s]
100%|██████████| 85/85 [00:01<00:00, 43.00it/s]
100%|██████████| 17/17 [00:00<00:00, 42.90it/s]


Epoch:44/1000.. Train Loss: 0.171.. Val Loss: 0.219.. Test Loss: 0.201.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.65it/s]
100%|██████████| 85/85 [00:02<00:00, 42.48it/s]
100%|██████████| 17/17 [00:00<00:00, 43.18it/s]


Epoch:45/1000.. Train Loss: 0.175.. Val Loss: 0.191.. Test Loss: 0.086.. Time: 0.50m


100%|██████████| 238/238 [00:28<00:00,  8.45it/s]
100%|██████████| 85/85 [00:02<00:00, 38.59it/s]
100%|██████████| 17/17 [00:00<00:00, 37.13it/s]


Epoch:46/1000.. Train Loss: 0.183.. Val Loss: 0.243.. Test Loss: 0.188.. Time: 0.52m


100%|██████████| 238/238 [00:29<00:00,  8.05it/s]
100%|██████████| 85/85 [00:02<00:00, 37.62it/s]
100%|██████████| 17/17 [00:00<00:00, 39.66it/s]


Epoch:47/1000.. Train Loss: 0.161.. Val Loss: 0.208.. Test Loss: 0.095.. Time: 0.54m


100%|██████████| 238/238 [00:28<00:00,  8.21it/s]
100%|██████████| 85/85 [00:02<00:00, 40.38it/s]
100%|██████████| 17/17 [00:00<00:00, 40.71it/s]


Epoch:48/1000.. Train Loss: 0.178.. Val Loss: 0.253.. Test Loss: 0.189.. Time: 0.53m


100%|██████████| 238/238 [00:27<00:00,  8.67it/s]
100%|██████████| 85/85 [00:01<00:00, 43.00it/s]
100%|██████████| 17/17 [00:00<00:00, 42.15it/s]


Epoch:49/1000.. Train Loss: 0.143.. Val Loss: 0.218.. Test Loss: 0.142.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.61it/s]
100%|██████████| 85/85 [00:02<00:00, 42.44it/s]
100%|██████████| 17/17 [00:00<00:00, 41.83it/s]


Loss Decreasing.. 0.171 >> 0.160 
saving model...
Epoch:50/1000.. Train Loss: 0.173.. Val Loss: 0.160.. Test Loss: 0.164.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.61it/s]
100%|██████████| 85/85 [00:01<00:00, 42.81it/s]
100%|██████████| 17/17 [00:00<00:00, 42.56it/s]


Epoch:51/1000.. Train Loss: 0.171.. Val Loss: 0.214.. Test Loss: 0.133.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.68it/s]
100%|██████████| 85/85 [00:01<00:00, 43.47it/s]
100%|██████████| 17/17 [00:00<00:00, 43.26it/s]


Epoch:52/1000.. Train Loss: 0.161.. Val Loss: 0.205.. Test Loss: 0.217.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.64it/s]
100%|██████████| 85/85 [00:01<00:00, 42.80it/s]
100%|██████████| 17/17 [00:00<00:00, 41.46it/s]


Epoch:53/1000.. Train Loss: 0.170.. Val Loss: 0.188.. Test Loss: 0.125.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.63it/s]
100%|██████████| 85/85 [00:01<00:00, 42.70it/s]
100%|██████████| 17/17 [00:00<00:00, 42.41it/s]


Epoch:54/1000.. Train Loss: 0.159.. Val Loss: 0.233.. Test Loss: 0.118.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.55it/s]
100%|██████████| 85/85 [00:01<00:00, 42.85it/s]
100%|██████████| 17/17 [00:00<00:00, 42.54it/s]


Epoch:55/1000.. Train Loss: 0.166.. Val Loss: 0.236.. Test Loss: 0.161.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.57it/s]
100%|██████████| 85/85 [00:01<00:00, 42.82it/s]
100%|██████████| 17/17 [00:00<00:00, 42.81it/s]


Epoch:56/1000.. Train Loss: 0.153.. Val Loss: 0.225.. Test Loss: 0.193.. Time: 0.50m


100%|██████████| 238/238 [00:27<00:00,  8.66it/s]
 76%|███████▋  | 65/85 [00:01<00:00, 42.73it/s]

In [None]:
fig, ax = plt.subplots()

ax.plot(training.results['Train'], label='Train')
ax.plot(training.results['Validation'], label='Validation')
ax.plot(training.results['Test'], label='Test')
ax.legend()

## Evaluation and plotting classes

In [None]:
class OSAEvaluator:
    
    def evaluate_OSA(self, dataset, model):
        
        torch.cuda.empty_cache()
        Y_real = []
        Y_pred = []
        for i, data in enumerate(tqdm(dataset)):
            src, tgt, y_real = data
            src, tgt, y_real = src.to(device), tgt.to(device), y_real.to(device)
            y_pred = model(src, tgt)
            Y_real.append(y_real[-1,:])
            Y_pred.append(y_pred[-1,:])
        
        Y_real = torch.vstack(Y_real).cpu()
        Y_pred = torch.vstack(Y_pred).cpu().detach()
        print('********** OSA Evaluation summary **********')
        print(f'OSA MSE: {mean_squared_error(Y_real, Y_pred)}')
        print(f'OSA RMSE: {np.sqrt(mean_squared_error(Y_real, Y_pred))}')
        print(f'OSA R2 score: {r2_score(Y_real, Y_pred)}')
        print('********************************************')
        return Y_real, Y_pred
            
class FSEvaluator:
    
    def evaluate_FS(self, dataset, model):
        
        torch.cuda.empty_cache()
        Y_real = []
        Y_pred = []
        tgt_sim = None
        for i, data in enumerate(tqdm(dataset)):
            src, tgt, y_real = data
            src, tgt, y_real = src.to(device), tgt.to(device), y_real.to(device)
            if tgt_sim is None:
                tgt_sim = tgt
            y_pred = model(src, tgt_sim)
            tgt_sim[:-1,:] = tgt_sim[1:,:].clone()
            tgt_sim[-1,:] = y_pred[-1,:].clone()
            Y_real.append(y_real[-1,:])
            Y_pred.append(y_pred[-1,:])
        
        Y_real = torch.vstack(Y_real).cpu()
        Y_pred = torch.vstack(Y_pred).cpu().detach()
        print('*********** FS Evaluation summary **********')
        print(f'FS MSE: {mean_squared_error(Y_real, Y_pred)}')
        print(f'FS RMSE: {np.sqrt(mean_squared_error(Y_real, Y_pred))}')
        print(f'FS R2 score: {r2_score(Y_real, Y_pred)}')
        print('********************************************')
        return Y_real, Y_pred
    
class Evaluator(OSAEvaluator, FSEvaluator):
    
    pass
        

In [None]:
evaluator = Evaluator()
Y_real, Y_pred_OSA = evaluator.evaluate_OSA(test_dataset, model)
Y_real, Y_pred_FS = evaluator.evaluate_FS(test_dataset, model)

In [None]:
fig, ax = plt.subplots()
ax.plot(Y_real, label='Real')
ax.plot(Y_pred_OSA, label='OSA')
ax.plot(Y_pred_FS, label='FS')
ax.legend()

In [None]:
evaluator = Evaluator()
Y_real, Y_pred_OSA = evaluator.evaluate_OSA(test_dataset, best_model)
Y_real, Y_pred_FS = evaluator.evaluate_FS(test_dataset, best_model)

In [None]:
fig, ax = plt.subplots()
ax.plot(Y_real, label='Real')
ax.plot(Y_pred_OSA, label='OSA')
ax.plot(Y_pred_FS, label='FS')
ax.legend()