<a href="https://colab.research.google.com/github/textnorms/address_text_norm/blob/master/PL_datas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! nvidia-smi

Mon Jun 29 12:32:32 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   51C    P8    30W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install pytorch-lightning --quiet

[K     |████████████████████████████████| 307kB 2.7MB/s 
[K     |████████████████████████████████| 829kB 8.4MB/s 
[K     |████████████████████████████████| 276kB 16.9MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone
  Building wheel for PyYAML (setup.py) ... [?25l[?25hdone


# Imports

In [3]:
! rm -rf *
! git clone -q https://github.com/textnorms/date_text_norm.git
! cp -r date_text_norm/syntetic_data/ .

! pip install -q num2words transformers

[K     |████████████████████████████████| 102kB 2.3MB/s 
[K     |████████████████████████████████| 675kB 11.0MB/s 
[K     |████████████████████████████████| 1.1MB 9.7MB/s 
[K     |████████████████████████████████| 890kB 19.4MB/s 
[K     |████████████████████████████████| 3.8MB 25.2MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [4]:
#Basics
from argparse import Namespace
import matplotlib.pyplot as plt
import random
import pandas as pd
import numpy as np

# Synthetic data generator
from syntetic_data import DateTextGenerator
from syntetic_data import RelativeDateTextGenerator

#Pytorch
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader

#Pytorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import seed_everything


# Transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW

# Sklearn
from sklearn.model_selection import train_test_split

In [5]:
# Assuring reproductibility
manual_seed = 2357 # only primes, cuz I like
def deterministic(rep=True):
    if rep:
        np.random.seed(manual_seed)
        torch.manual_seed(manual_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(manual_seed)
            torch.cuda.manual_seed_all(manual_seed)
        torch.backends.cudnn.enabled = False 
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        print(f'Deterministic experiment, seed: {manual_seed}')
    else:
        print('Random experiment')

deterministic()
seed_everything(2357)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Deterministic experiment, seed: 2357
Using device: cuda


# LightningBase



In [6]:
class LightningBase:

    '''
        hparams needs to contain:
            - lr (float)
            - optimizer (str)
            - optimizer_kwargs (Dict[str,Optional])
            - train_batch_size (int)
            - val_batch_size (int)
            - shuffle_train (bool)
            - num_workers (int)
        Properties needed:
            - train_dataset (Dataset)
            - val_dataset (Dataset)
            - test_dataset (Dataset)
    '''

    def _average_key(self, outputs, key):
        return torch.stack([o[key] for o in outputs]).float().mean()

    def get_dataloader(self, dataset, batch_size, shuffle, num_workers):
        return DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=num_workers
        )

    def get_optimizer(self,):
        optimizer_name = self.hparams.optimizer
        lr = self.hparams.lr
        optimizer_hparams = self.hparams.optimizer_kwargs
        optimizer = getattr(torch.optim, optimizer_name)
        return optimizer(self.parameters(), lr=lr, **optimizer_hparams)

    def train_dataloader(self,):
        return self.get_dataloader(
            self.train_dataset,
            batch_size=self.hparams.train_batch_size,
            shuffle=self.hparams.shuffle_train,
            num_workers=self.hparams.num_workers
        )

    def val_dataloader(self,):
        return self.get_dataloader(
            self.valid_dataset,
            batch_size=self.hparams.val_batch_size,
            shuffle=False,
            num_workers=self.hparams.num_workers
        )

    def test_dataloader(self,):
        return self.get_dataloader(
            self.test_dataset,
            batch_size=self.hparams.val_batch_size,
            shuffle=False,
            num_workers=self.hparams.num_workers
        )

    def configure_optimizers(self):
        optimizer = self.get_optimizer()
        return optimizer

# Dataset

In [7]:
# Dataset params
LANGUAGE = 'pt'

# Model params
MODEL_SZ = 't5-small'
TOK = T5Tokenizer.from_pretrained(MODEL_SZ)
MAX_LEN_SRC  = 48
MAX_LEN_TRGT = 12

# Train params
BATCH_SZ = 16
N_EPOCHS = 3
WINDOW   = 7

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…




In [8]:
pd.set_option('display.max_rows',70)

print('Generating absolute and incomplete dates')
dates = DateTextGenerator(start_date='01/01/1921',
                          end_date='31/12/2120',
                          language=LANGUAGE)

print('Generating relative dates')
rel_dates = RelativeDateTextGenerator(n_samples=500, text_noise_rate=0.0,
                                       max_noise_occurences_per_sample=3,
                                       samples_per_method=18,
                                       language=LANGUAGE)

df = dates.generate_date_dataset()
df = df.append(rel_dates.generate_date_dataset(),ignore_index=True)

Generating absolute and incomplete dates
Generating relative dates


In [9]:
df['Target Format'].value_counts()

DD/MM/YYYY    73049
Relative       9000
MM/YYYY        7200
DD/MM          2562
Name: Target Format, dtype: int64

## Function to split the dataset

In [10]:
def split_data(df, test_size=0.2, verbose=True):
    l = list(set(df['Input Pattern'].values))
    num_test = int(len(l)*test_size)
    test_methods = [random.randint(1, len(l)) for _ in range(num_test)]
    print(test_methods)
    df_test = df[df['Input Pattern'].isin(test_methods)]
    print(df_test.shape)
    x_test = df_test.Input.values
    y_test = df_test.Target.values

    df_train = df[~df['Input Pattern'].isin(test_methods)]

    x_train, x_val, y_train, y_val = train_test_split(
        df_train.Input.values,
        df_train.Target.values,
        shuffle=True, 
        test_size=test_size,
        random_state=2357
        )
    if verbose:
        print(f'Date types of test set: {test_methods} with len: {len(test_methods)}')
        print(f'x_train: {len(x_train)}  --  y_train: {len(y_train)}\n\
x_val:   {len(x_val)}  --  y_val:   {len(y_val)}\n\
x_test:  {len(x_test)}  --  y_test:  {len(y_test)}')

    return x_train, y_train, x_val, y_val, x_test, y_test

# creating sets
x_train, y_train, x_val, y_val, x_test, y_test = split_data(df, 
                                                            test_size=0.25, 
                                                            verbose=True)

[41, 8, 15, 15, 2, 26, 24, 36, 6, 20, 25]
(20667, 5)
Date types of test set: [41, 8, 15, 15, 2, 26, 24, 36, 6, 20, 25] with len: 11
x_train: 53358  --  y_train: 53358
x_val:   17786  --  y_val:   17786
x_test:  20667  --  y_test:  20667


In [11]:
class DateDataset(Dataset):
    def __init__(self, data, label, tokenizer, source_max_length, target_max_length):
        self.tokenizer = tokenizer
        self.data = data
        self.label = label
        self.source_max_length = source_max_length
        self.target_max_length = target_max_length
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        source = self.data[idx]
        target = self.label[idx]

        source_tokenized = self.tokenizer.encode_plus(
            f'{source} {self.tokenizer.eos_token}',
            max_length=self.source_max_length,
            pad_to_max_length=True,
            return_tensors='pt')

        target_tokenized = self.tokenizer.encode_plus(
            f'{target} {self.tokenizer.eos_token}',
            max_length=self.target_max_length,
            pad_to_max_length=True,
            return_tensors='pt')

        source_token_ids = source_tokenized['input_ids'].squeeze()
        source_mask = source_tokenized['attention_mask'].squeeze()
        target_token_ids = target_tokenized['input_ids'].squeeze()
        
        return source_token_ids, source_mask, target_token_ids

In [12]:
class DateBase(LightningBase):

    ## AS FUNÇÕES ABAIXO SÃO NECESSÁRIAS PARA O PYTORCH LIGHTNING ##
    
    ## O único método exigido nesta classe é prepare_data
    ## outros métodos podem auxiliares podem ser criados acima

    def prepare_data(self):
        '''
            O dataset deve ser preparado utilizando este método.
            Ao final da execução devemos ter declarado as seguinte variáveis
                - self.train_dataset
                - self.valid_dataset
                - self.test_dataset
        '''
        self.train_dataset = DateDataset(x_train, y_train, TOK, MAX_LEN_SRC, MAX_LEN_TRGT)
        self.valid_dataset = DateDataset(x_val, y_val, TOK, MAX_LEN_SRC, MAX_LEN_TRGT)
        self.test_dataset  = DateDataset(x_test, y_test, TOK, MAX_LEN_SRC, MAX_LEN_TRGT)

# Model

In [13]:
class DateNet(torch.nn.Module):
    def __init__(self):
        super(DateNet, self).__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(MODEL_SZ)
        self.tokenizer = TOK
    
    def forward(self, source_token_ids, source_mask, target_token_ids=None,
                target_mask=None):

        if self.training:
            # Calcular a loss dado os target_token_ids

            loss = self.model.forward(
                input_ids = source_token_ids,
                attention_mask = source_mask,
                lm_labels  = target_token_ids
                )[0]
            
            return loss

        else:

            predicted_token_ids = model.generate(
            source_token_ids.to(device), 
            source_mask.to(device),
            MAX_LEN_TRGT
            )
            
            return predicted_token_ids
 
    @torch.no_grad()    
    def generate(self, token_ids, att_mask, max_len_target):
        predict = self.model.generate(
            input_ids=token_ids, 
            attention_mask=att_mask,
            max_length=max_len_target
            )
        return predict
    
    @torch.no_grad()  
    def generate_example(self, text_input, tokenizer, max_len_source=MAX_LEN_SRC):

        self.model.eval()
        
        example_tokenized = tokenizer.encode_plus(
            f'{text_input} {tokenizer.eos_token}',
            max_length=max_len_source,
            pad_to_max_length=True,
            return_tensors='pt')
            
        example_token_ids = example_tokenized['input_ids']
        example_mask = example_tokenized['attention_mask']

        predicted_example = self.model.generate(
            input_ids=example_token_ids.to(device), 
            attention_mask=example_mask.to(device),
            max_length=MAX_LEN_TRGT
            )

        self.model.train()

        out_text = [tokenizer.decode(text) for text in predicted_example]
        
        return out_text

# Pytorch Lightning

In [14]:
class DatePL(DateNet, pl.LightningModule):

    def _handle_batch(self, batch):
        source_token_ids, source_mask, target_token_ids = batch
        loss = self(source_token_ids, 
                    source_mask, 
                    target_token_ids
                    )
        return loss

    def _handle_eval_batch(self, batch):
        outputs = self._handle_batch(batch)

        return outputs

    def _handle_eval_epoch_end(self, outputs, phase):
        loss_avg = self._average_key(outputs, f'{phase}_loss')
        return loss_avg

    ## AS FUNÇÕES ABAIXO SÃO NECESSÁRIAS PARA O PYTORCH LIGHTNING ##

    def training_step(self, batch, batch_idx):
        outputs = self._handle_batch(batch)
        return {'loss': outputs}

    def validation_step(self, batch, batch_idx):

        source_token_ids = batch[0]
        source_mask = batch[1]
        target_token_ids = batch[2]

        predicted_token_ids = self.forward(source_token_ids, source_mask)

        predicted = [self.tokenizer.decode(token_ids) 
                     for token_ids in predicted_token_ids]

        target = [self.tokenizer.decode(token_ids) 
                     for token_ids in target_token_ids]

        correct_preds = np.sum(np.array(predicted) == np.array(target))

        if batch_idx % 100 == 0:
          print('pred:',predicted)
          print('target:',target)

        return {'val_correct_preds': correct_preds}

    def test_step(self, batch, batch_idx):
        source_token_ids = batch[0]
        source_mask = batch[1]
        target_token_ids = batch[2]

        predicted_token_ids = self.forward(source_token_ids, source_mask)

        predicted = [self.tokenizer.decode(token_ids) 
                     for token_ids in predicted_token_ids]

        target = [self.tokenizer.decode(token_ids) 
                     for token_ids in target_token_ids]

        correct_preds = np.sum(np.array(predicted) == np.array(target))

        if batch_idx % 100 == 0:
          print('pred:',predicted)
          print('target:',target)

        return {'test_correct_preds': correct_preds}

    def validation_epoch_end(self, outputs):

        acc = sum([x['val_correct_preds'] for x in outputs]) / len(self.valid_dataset)
        
        print(f'validation accuracy: {acc}')
        return {'avg_acc_val': acc}

    def test_epoch_end(self, outputs):
        acc = sum([x['test_correct_preds'] for x in outputs]) / len(self.test_dataset)
        return {'avg_acc_test': acc}


# Final Class

In [15]:
class DateTuner(DateBase, DatePL):

    '''
        Classe final responsável por controlar os hiper parâmetros e
        combinar as classes do modelo e dataset;

        default_hparams deve conter sugestões de todos hiper parâmetros
        necessários para se criar o modelo
    '''

    default_hparams = {
        "lr": 5e-4,
        "optimizer": 'Adam',
        "optimizer_kwargs": {},
        "train_batch_size": 16,
        "val_batch_size": 16,
        "shuffle_train": True,
        "num_workers": 4,
        "deterministic": True,
        "seed": 2357,
    }

    def __init__(self, hparams=None):
        super(DatePL, self).__init__()
        self.hparams = self._construct_hparams(hparams)

    def _construct_hparams(self, hparams):
        default_hparams = self.default_hparams.copy()
        if hparams is not None:
            default_hparams.update(hparams)

        if default_hparams['deterministic']:
            pl.seed_everything(default_hparams['seed'])

        return Namespace(**default_hparams)

# Training

In [16]:
# hiper parametros que serão utilizados pelas classes acima
hparams = {
    "train_batch_size": 16,
    "val_batch_size": 16,
    "deterministic": True,
    "seed": 2357
}

# hiper parametros que serão utilizados pela class Trainer do Lightning
trainer_hparams = {
    "gpus": 1,
    "max_epochs": N_EPOCHS, 
    "progress_bar_refresh_rate":100,
}

# atualizamos o primeiro com o segundo para que todos fiquem salvos no arquivo
# de hiper parametros hparams.yaml
hparams.update(trainer_hparams)

In [17]:
try:
  del model
except:
  print('Model already erased, starting a new one!')

model = DateTuner()

Model already erased, starting a new one!


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1197.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=242136741.0, style=ProgressStyle(descri…




## Dev Run

Primeiramente fazemos *fast_dev_run* para ter certeza que não erros no códigos.

O *fast_dev_run* irá rodar um *step* de cada fase: treino, validação e teste.

In [18]:
dev_run = True

if dev_run:
    trainer = pl.Trainer(fast_dev_run=True, **trainer_hparams)
    trainer.fit(model)

Running in fast_dev_run mode: will run a full train, val and test loop using a single batch
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

pred: ['11 do mês dez de 2023', '26 - julho - 1966', '15 15 - 8 - 2104', '01 01 / 12 / 2114', '12-7-1953', '1 / 11 / 1972', '3 / mai / 1973', '27. 2. 2066', 'janeiro dois mil mil e novent', '1925', '04 04. 11. 2026', 'jun-2049', 'trinta de janeiro de dois', '12. 01. 1996', '1929', '27/12/2056']
target: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
validation accuracy: 5.622399640166423e-05



## Fit

Por fim fazemos *fit* no dataset inteiro

In [19]:
trainer = pl.Trainer(**trainer_hparams)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]


In [20]:
trainer.fit(model)


  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

pred: ['11 do mês dez de 2023', '26 - julho - 1966', '15 15 - 8 - 2104', '01 01 / 12 / 2114', '12-7-1953', '1 / 11 / 1972', '3 / mai / 1973', '27. 2. 2066', 'janeiro dois mil mil e novent', '1925', '04 04. 11. 2026', 'jun-2049', 'trinta de janeiro de dois', '12. 01. 1996', '1929', '27/12/2056']
target: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
validation accuracy: 5.622399640166423e-05


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

pred: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
target: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
pred: ['25/11/2011', '20/08/2102', '-465d', '05/2073', '18d', '04/03/1932', '15/08/1937', '28/10/1932', '21/11/2019', '29/09/2007', '24/01/1976', '30/04/2026', '08/12/1926', '18/04/2052', '15/01/2010', '18/04/1991']
target: ['25/11/2011', '20/08/2102', '-465d', '05/2073', '18d', '04/03/1932', '15/08/1937', '28/10/1932', '21/11/2019', '29/09/2007', '24/01/1976', '30/04/2026', '08/12/1926', '18/04/2052', '15/01/2010', '18/04/1991']
pred: ['12/06/1937', '10/05/1924', '11/08/2055', '22/03/2001', '02/02/1980', '04/12/2007', '27/07/2074', '20/08/2090', '

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

pred: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
target: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
pred: ['25/11/2011', '20/08/2102', '-465d', '05/2073', '18d', '04/03/1932', '15/08/1937', '28/10/1932', '21/11/2019', '29/09/2007', '24/01/1976', '30/04/2026', '08/12/1926', '18/04/2052', '15/01/2010', '18/04/1991']
target: ['25/11/2011', '20/08/2102', '-465d', '05/2073', '18d', '04/03/1932', '15/08/1937', '28/10/1932', '21/11/2019', '29/09/2007', '24/01/1976', '30/04/2026', '08/12/1926', '18/04/2052', '15/01/2010', '18/04/1991']
pred: ['12/06/1937', '10/05/1924', '11/08/2055', '22/03/2001', '02/02/1980', '04/12/2007', '27/07/2074', '20/08/2090', '

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

pred: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
target: ['11/10/2023', '26/07/1966', '15/08/2104', '01/12/2114', '12/07/1953', '01/11/1972', '03/05/1973', '27/02/2066', '01/2090', '13/10/1925', '04/11/2026', '06/2049', '30/01/2030', '12/01/1996', '02/1929', '27/12/2056']
pred: ['25/11/2011', '20/08/2102', '-465d', '05/2073', '18d', '04/03/1932', '15/08/1937', '28/10/1932', '21/11/2019', '29/09/2007', '24/01/1976', '30/04/2026', '08/12/1926', '18/04/2052', '15/01/2010', '18/04/1991']
target: ['25/11/2011', '20/08/2102', '-465d', '05/2073', '18d', '04/03/1932', '15/08/1937', '28/10/1932', '21/11/2019', '29/09/2007', '24/01/1976', '30/04/2026', '08/12/1926', '18/04/2052', '15/01/2010', '18/04/1991']
pred: ['12/06/1937', '10/05/1924', '11/08/2055', '22/03/2001', '02/02/1980', '04/12/2007', '27/07/2074', '20/08/2090', '

1

In [21]:
trainer.test(model)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

pred: ['05/01/1921', '14/01/1921', '17/01/1921', '19/01/1921', '21/01/1921', '24/01/1921', '26/01/1921', '27/01/1921', '29/01/1921', '30/01/1921', '31/01/1921', '02/02/1921', '09/03/1921', '17/03/1921', '21/03/1921', '23/03/1921']
target: ['05/01/1921', '14/01/1921', '17/01/1921', '19/01/1921', '21/01/1921', '24/01/1921', '26/01/1921', '27/01/1921', '29/01/1921', '30/01/1921', '31/01/1921', '02/02/1921', '09/03/1921', '17/03/1921', '21/03/1921', '23/03/1921']
pred: ['22/05/1941', '24/05/1941', '26/05/1941', '27/05/1941', '28/05/1941', '31/05/1941', '05/06/1941', '09/06/1941', '10/06/1941', '36/06/1941', '02/07/1941', '09/07/1941', '11/07/1941', '12/07/1941', '10/08/1941', '12/08/1941']
target: ['22/05/1941', '24/05/1941', '26/05/1941', '27/05/1941', '28/05/1941', '31/05/1941', '05/06/1941', '09/06/1941', '10/06/1941', '30/06/1941', '02/07/1941', '09/07/1941', '11/07/1941', '12/07/1941', '10/08/1941', '12/08/1941']
pred: ['27/08/1961', '21/09/1961', '22/09/1961', '23/09/1961', '24/09/19