In [None]:
# model
import torch
import torch.nn as nn

# data 
import torchtext
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator

#training 
import tqdm 

In [None]:
### Random seed for deterministic results
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
device

device(type='cuda', index=0)

### Data 

In [5]:
BATCH_SIZE = 128
max_len = 48

In [6]:
# SRC Field
SRC = Field(
    init_token="<sos>",
    eos_token="<eos>",
    tokenize="spacy",
    tokenizer_language="de",
    batch_first=True,
    lower=True
)

In [7]:
# TRG Field
TRG = Field(
    init_token="<sos>",
    eos_token="<eos>",
    tokenize="spacy",
    tokenizer_language="en",
    batch_first=True,
    lower=True
)

In [8]:
## We're using Multi30K dataset
train, valid, test = Multi30k.splits(exts=(".de", ".en"), fields=(SRC, TRG))

In [9]:
# Bulding the vocab
SRC.build_vocab(train, min_freq=2)
TRG.build_vocab(train, min_freq=2)

In [10]:
# Data loader
train_loader, val_loader, test_loader = BucketIterator.splits(
    datasets=(train, valid, test),
    batch_size=BATCH_SIZE,
    device=device
)

In [11]:
for i, batch in enumerate(train_loader):
    print(batch.src.size())
    print(batch.trg.size())
    break

torch.Size([128, 29])
torch.Size([128, 30])


In [12]:
## Vocan Insepction
print(f'English vocab {len(TRG.vocab)}\nGerman vocab {len(SRC.vocab)}')

English vocab 5893
German vocab 7853


### Seq2Seq Model

#### Encoder

In [13]:
class Encoder(nn.Module):
    
    def __init__(self, hidden_dim, embedding_dim, vocab_size, num_layers=2, dropout=0.25):
        super(Encoder, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        self.seq = nn.LSTM(
            input_size=embedding_dim, 
            hidden_size=hidden_dim, 
            num_layers=num_layers, 
            batch_first=True, 
            dropout=dropout
        )
        self.dropout = nn.Dropout(p=dropout)
    
    def forward(self, src):
        
        embedded = self.embedding(src)
        
        outputs, (hidden, cell) = self.seq(embedded)
        
        return hidden, cell        

#### Decoder

In [14]:
class Decoder(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers=2, dropout=0.25):
        super(Decoder, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.output_dim = vocab_size
        
        self.embedding  = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        self.seq = nn.LSTM(
            input_size=embedding_dim, 
            hidden_size=hidden_dim, 
            num_layers=num_layers, 
            batch_first=True, 
            dropout=dropout
        )
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(in_features=hidden_dim, out_features=vocab_size)
    
    def forward(self, x, hidden, cell):
        
#         x = x.unsqueeze(1)
        
        embedded = self.dropout(self.embedding(x))
        
        output, (hidden, cell) = self.seq(embedded, (hidden, cell))
        
        prediction = self.fc(output.squeeze(0))
        
        return prediction, hidden, cell    

In [15]:
## Hidden_dim dim and num_layers of encoder and decoder must be sam 

In [17]:
class Seq2Seq(nn.Module):
    
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder  = encoder
        self.decoder = decoder
        
    
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        batch_size = src.size(0)
        trg_len = trg.size(1)
        
        trg_vocab_size = self.decoder.output_dim
        
        hidden, cell = encoder(src)
        
        prediction, _, _ = decoder(trg, hidden, cell)
        
        return prediction 
             

### Training

In [18]:
import torch.utils.tensorboard as tensorboard

In [19]:
encoder = Encoder(hidden_dim=64, embedding_dim=100, vocab_size=len(SRC.vocab)).to(device)
decoder = Decoder(vocab_size=len(TRG.vocab), embedding_dim=100, hidden_dim=64).to(device)
model = Seq2Seq(encoder, decoder).to(device)

In [20]:
lr = 1e-3
epochs = 20
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
writer = tensorboard.SummaryWriter()

In [21]:
###
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX).to(device)

In [22]:
def eval(model, data):
    losses = []
    with torch.no_grad():
        for batch in data:
            outputs = model(batch.src, batch.trg)
            loss = criterion(outputs.view(outputs.shape[0]*outputs.shape[1], outputs.shape[2]), batch.trg.view(-1))
            losses.append(loss.item())
    return sum(losses)/len(losses)
            

In [23]:
steps = 0
epoch_progress = tqdm.tqdm(total=epochs, desc="Epoch", position=0)

for epoch in range(epochs):
    
    step_progress = tqdm.tqdm(total=len(train_loader), )
    train_loss = []
    
    for batch in train_loader:
        src = batch.src
        trg = batch.trg
        
        outputs = model(src, trg)
        
        loss = criterion(outputs.view(outputs.shape[0]*outputs.shape[1], outputs.shape[2]), trg.view(-1))
        loss.backward()
        optimizer.step()
        
        
        if steps%100==0:
            print(f'Epoch {epoch} | Step {steps} | Train_loss {loss.item()} | Val_loss = {eval(model, val_loader)}')
            
        steps += 1
        step_progress.update(1)
    epoch_progress.update(1)    

Epoch:   0%|          | 0/20 [00:00<?, ?it/s]
  0%|          | 0/227 [00:00<?, ?it/s][A
  0%|          | 1/227 [00:00<00:25,  8.91it/s][A
  3%|▎         | 7/227 [00:00<00:18, 11.86it/s][A

Epoch 0 | Step 0 | Train_loss 8.672889709472656 | Val_loss = 8.658742308616638



  6%|▌         | 13/227 [00:00<00:13, 15.49it/s][A
  8%|▊         | 19/227 [00:00<00:10, 19.87it/s][A
 11%|█         | 25/227 [00:00<00:08, 24.48it/s][A
 14%|█▍        | 32/227 [00:00<00:06, 29.97it/s][A
 17%|█▋        | 39/227 [00:00<00:05, 35.83it/s][A
 20%|█▉        | 45/227 [00:00<00:04, 40.65it/s][A
 23%|██▎       | 52/227 [00:00<00:03, 45.70it/s][A
 26%|██▌       | 59/227 [00:01<00:03, 49.99it/s][A
 29%|██▉       | 66/227 [00:01<00:02, 53.74it/s][A
 32%|███▏      | 73/227 [00:01<00:02, 56.43it/s][A
 35%|███▌      | 80/227 [00:01<00:02, 59.66it/s][A
 38%|███▊      | 87/227 [00:01<00:02, 61.34it/s][A
 41%|████▏     | 94/227 [00:01<00:02, 62.77it/s][A
 44%|████▍     | 101/227 [00:01<00:02, 57.39it/s][A
 48%|████▊     | 108/227 [00:01<00:02, 58.52it/s][A

Epoch 0 | Step 100 | Train_loss 6.064271450042725 | Val_loss = 5.928687572479248



 51%|█████     | 115/227 [00:01<00:01, 59.56it/s][A
 54%|█████▎    | 122/227 [00:02<00:01, 61.10it/s][A
 57%|█████▋    | 129/227 [00:02<00:01, 60.61it/s][A
 60%|█████▉    | 136/227 [00:02<00:01, 60.83it/s][A
 63%|██████▎   | 143/227 [00:02<00:01, 61.51it/s][A
 66%|██████▌   | 150/227 [00:02<00:01, 61.64it/s][A
 69%|██████▉   | 157/227 [00:02<00:01, 61.49it/s][A
 72%|███████▏  | 164/227 [00:02<00:01, 61.21it/s][A
 75%|███████▌  | 171/227 [00:02<00:00, 61.30it/s][A
 78%|███████▊  | 178/227 [00:03<00:00, 61.75it/s][A
 81%|████████▏ | 185/227 [00:03<00:00, 62.13it/s][A
 85%|████████▍ | 192/227 [00:03<00:00, 62.78it/s][A
 88%|████████▊ | 199/227 [00:03<00:00, 63.65it/s][A
 91%|█████████ | 206/227 [00:03<00:00, 58.35it/s][A
 94%|█████████▍| 213/227 [00:03<00:00, 60.10it/s][A

Epoch 0 | Step 200 | Train_loss 6.405675888061523 | Val_loss = 6.162975311279297



 97%|█████████▋| 220/227 [00:03<00:00, 60.54it/s][A
Epoch:   5%|▌         | 1/20 [00:03<01:12,  3.82s/it]

100%|██████████| 227/227 [00:03<00:00, 59.43it/s]


  2%|▏         | 4/227 [00:00<00:05, 39.03it/s][A[A

  5%|▍         | 11/227 [00:00<00:04, 45.00it/s][A[A

  8%|▊         | 18/227 [00:00<00:04, 48.80it/s][A[A

 11%|█         | 25/227 [00:00<00:03, 52.67it/s][A[A

 14%|█▍        | 32/227 [00:00<00:03, 55.51it/s][A[A

 17%|█▋        | 39/227 [00:00<00:03, 57.20it/s][A[A

 20%|██        | 46/227 [00:00<00:03, 59.73it/s][A[A

 23%|██▎       | 53/227 [00:00<00:02, 61.10it/s][A[A

 26%|██▋       | 60/227 [00:00<00:02, 61.23it/s][A[A

 30%|██▉       | 67/227 [00:01<00:02, 61.44it/s][A[A

 33%|███▎      | 74/227 [00:01<00:02, 55.56it/s][A[A

 35%|███▌      | 80/227 [00:01<00:02, 56.76it/s][A[A

Epoch 1 | Step 300 | Train_loss 7.65480375289917 | Val_loss = 7.566578388214111




 38%|███▊      | 86/227 [00:01<00:02, 54.65it/s][A[A

 41%|████      | 93/227 [00:01<00:02, 57.39it/s][A[A

 44%|████▍     | 100/227 [00:01<00:02, 58.49it/s][A[A

 47%|████▋     | 106/227 [00:01<00:02, 58.73it/s][A[A

 50%|████▉     | 113/227 [00:01<00:01, 60.30it/s][A[A

 53%|█████▎    | 120/227 [00:02<00:01, 60.90it/s][A[A

 56%|█████▌    | 127/227 [00:02<00:01, 59.18it/s][A[A

 59%|█████▊    | 133/227 [00:02<00:01, 57.02it/s][A[A

 61%|██████    | 139/227 [00:02<00:01, 50.93it/s][A[A

 64%|██████▍   | 145/227 [00:02<00:01, 52.54it/s][A[A

 67%|██████▋   | 152/227 [00:02<00:01, 56.09it/s][A[A

 70%|███████   | 159/227 [00:02<00:01, 58.04it/s][A[A

 73%|███████▎  | 166/227 [00:02<00:01, 60.18it/s][A[A

 76%|███████▌  | 173/227 [00:02<00:00, 59.68it/s][A[A

 79%|███████▉  | 180/227 [00:03<00:00, 48.26it/s][A[A

Epoch 1 | Step 400 | Train_loss 9.703551292419434 | Val_loss = 9.197302460670471




 82%|████████▏ | 187/227 [00:03<00:00, 52.72it/s][A[A

 85%|████████▌ | 194/227 [00:03<00:00, 55.15it/s][A[A

 89%|████████▊ | 201/227 [00:03<00:00, 57.71it/s][A[A

 92%|█████████▏| 208/227 [00:03<00:00, 55.68it/s][A[A

 95%|█████████▌| 216/227 [00:03<00:00, 59.02it/s][A[A

Epoch:  10%|█         | 2/20 [00:07<01:09,  3.85s/it][A
100%|██████████| 227/227 [00:03<00:00, 58.05it/s]

  2%|▏         | 5/227 [00:00<00:04, 47.91it/s][A
  5%|▌         | 12/227 [00:00<00:04, 51.27it/s][A
  8%|▊         | 19/227 [00:00<00:03, 54.68it/s][A
 11%|█▏        | 26/227 [00:00<00:03, 58.09it/s][A
 14%|█▍        | 32/227 [00:00<00:03, 55.33it/s][A
 16%|█▋        | 37/227 [00:00<00:03, 50.87it/s][A
 19%|█▉        | 44/227 [00:00<00:03, 54.86it/s][A
 22%|██▏       | 50/227 [00:00<00:03, 51.74it/s][A
 25%|██▍       | 56/227 [00:01<00:03, 47.65it/s][A

Epoch 2 | Step 500 | Train_loss 9.686502456665039 | Val_loss = 8.989930152893066



 27%|██▋       | 62/227 [00:01<00:03, 50.21it/s][A
 30%|███       | 69/227 [00:01<00:02, 53.72it/s][A
 33%|███▎      | 76/227 [00:01<00:02, 54.61it/s][A
 36%|███▌      | 82/227 [00:01<00:02, 52.35it/s][A
 39%|███▉      | 89/227 [00:01<00:02, 56.21it/s][A
 42%|████▏     | 96/227 [00:01<00:02, 59.21it/s][A
 45%|████▌     | 103/227 [00:01<00:02, 51.94it/s][A
 48%|████▊     | 110/227 [00:02<00:02, 55.16it/s][A
 52%|█████▏    | 117/227 [00:02<00:01, 56.41it/s][A
 55%|█████▍    | 124/227 [00:02<00:01, 59.62it/s][A
 58%|█████▊    | 131/227 [00:02<00:01, 59.20it/s][A
 61%|██████    | 138/227 [00:02<00:01, 62.01it/s][A
 64%|██████▍   | 145/227 [00:02<00:01, 54.68it/s][A
 67%|██████▋   | 151/227 [00:02<00:01, 50.72it/s][A
 70%|██████▉   | 158/227 [00:02<00:01, 54.69it/s][A

Epoch 2 | Step 600 | Train_loss 11.585112571716309 | Val_loss = 11.266267657279968



 73%|███████▎  | 165/227 [00:02<00:01, 57.83it/s][A
 76%|███████▌  | 172/227 [00:03<00:00, 60.50it/s][A
 79%|███████▉  | 179/227 [00:03<00:00, 60.10it/s][A
 82%|████████▏ | 186/227 [00:03<00:00, 56.70it/s][A
 85%|████████▌ | 193/227 [00:03<00:00, 58.77it/s][A
 88%|████████▊ | 200/227 [00:03<00:00, 59.54it/s][A
 91%|█████████ | 207/227 [00:03<00:00, 55.86it/s][A
 94%|█████████▍| 214/227 [00:03<00:00, 57.83it/s][A
Epoch:  15%|█▌        | 3/20 [00:11<01:06,  3.90s/it]

  0%|          | 0/227 [00:00<?, ?it/s][A[A

  2%|▏         | 5/227 [00:00<00:04, 48.56it/s][A[A

  5%|▌         | 12/227 [00:00<00:04, 51.64it/s][A[A

  7%|▋         | 17/227 [00:00<00:04, 49.04it/s][A[A

  9%|▉         | 21/227 [00:00<00:05, 40.41it/s][A[A

 12%|█▏        | 27/227 [00:00<00:04, 44.56it/s][A[A

Epoch 3 | Step 700 | Train_loss 14.427364349365234 | Val_loss = 13.817185997962952




 15%|█▍        | 34/227 [00:00<00:03, 49.40it/s][A[A

 18%|█▊        | 41/227 [00:00<00:03, 52.14it/s][A[A

 21%|██        | 47/227 [00:00<00:03, 53.10it/s][A[A

 24%|██▍       | 54/227 [00:01<00:03, 55.86it/s][A[A

 26%|██▋       | 60/227 [00:01<00:03, 52.84it/s][A[A

 29%|██▉       | 66/227 [00:01<00:03, 48.49it/s][A[A

 32%|███▏      | 72/227 [00:01<00:03, 50.98it/s][A[A

 35%|███▍      | 79/227 [00:01<00:02, 54.66it/s][A[A

 38%|███▊      | 86/227 [00:01<00:02, 56.48it/s][A[A

 41%|████      | 92/227 [00:01<00:02, 56.28it/s][A[A

 44%|████▎     | 99/227 [00:01<00:02, 58.19it/s][A[A

 46%|████▋     | 105/227 [00:01<00:02, 50.22it/s][A[A

 49%|████▉     | 112/227 [00:02<00:02, 53.41it/s][A[A

 52%|█████▏    | 118/227 [00:02<00:01, 55.00it/s][A[A

 55%|█████▍    | 124/227 [00:02<00:02, 46.50it/s][A[A

 57%|█████▋    | 130/227 [00:02<00:01, 49.10it/s][A[A

Epoch 3 | Step 800 | Train_loss 14.674494743347168 | Val_loss = 13.791999816894531




 60%|██████    | 137/227 [00:02<00:01, 52.69it/s][A[A

 63%|██████▎   | 144/227 [00:02<00:01, 55.49it/s][A[A

 66%|██████▌   | 150/227 [00:02<00:01, 48.73it/s][A[A

 69%|██████▉   | 157/227 [00:02<00:01, 52.87it/s][A[A

 72%|███████▏  | 164/227 [00:03<00:01, 55.60it/s][A[A

 75%|███████▍  | 170/227 [00:03<00:01, 54.97it/s][A[A

 78%|███████▊  | 176/227 [00:03<00:00, 52.81it/s][A[A

 81%|████████  | 183/227 [00:03<00:00, 56.07it/s][A[A

 83%|████████▎ | 189/227 [00:03<00:00, 56.58it/s][A[A

 86%|████████▋ | 196/227 [00:03<00:00, 58.80it/s][A[A

 89%|████████▉ | 203/227 [00:03<00:00, 60.67it/s][A[A

 93%|█████████▎| 210/227 [00:03<00:00, 61.94it/s][A[A

 96%|█████████▌| 217/227 [00:03<00:00, 61.96it/s][A[A

Epoch:  20%|██        | 4/20 [00:15<01:03,  3.99s/it][A


100%|██████████| 227/227 [00:04<00:00, 53.89it/s]

Epoch 3 | Step 900 | Train_loss 14.81888484954834 | Val_loss = 14.979406476020813






  2%|▏         | 4/227 [00:00<00:05, 38.83it/s][A[A[A


  4%|▍         | 10/227 [00:00<00:05, 43.13it/s][A[A[A


  7%|▋         | 17/227 [00:00<00:04, 47.09it/s][A[A[A


 10%|█         | 23/227 [00:00<00:04, 49.22it/s][A[A[A


 13%|█▎        | 29/227 [00:00<00:03, 50.29it/s][A[A[A


 15%|█▍        | 34/227 [00:00<00:04, 47.34it/s][A[A[A


 18%|█▊        | 41/227 [00:00<00:03, 50.69it/s][A[A[A


 21%|██        | 48/227 [00:00<00:03, 53.93it/s][A[A[A


 24%|██▍       | 54/227 [00:01<00:03, 53.77it/s][A[A[A


 26%|██▋       | 60/227 [00:01<00:03, 54.52it/s][A[A[A


 29%|██▉       | 66/227 [00:01<00:02, 55.93it/s][A[A[A


 32%|███▏      | 73/227 [00:01<00:02, 58.52it/s][A[A[A


 35%|███▍      | 79/227 [00:01<00:02, 52.76it/s][A[A[A


 38%|███▊      | 86/227 [00:01<00:02, 55.96it/s][A[A[A


 41%|████      | 92/227 [00:01<00:02, 57.06it/s][A[A[A


 43%|████▎     | 98/227 [00:01<00:02, 46.02it/s][A[A[A

Epoch 4 | Step 1000 | Train_loss 15.259500503540039 | Val_loss = 14.552477478981018





 46%|████▌     | 104/227 [00:01<00:02, 48.09it/s][A[A[A


 49%|████▉     | 111/227 [00:02<00:02, 51.78it/s][A[A[A


 52%|█████▏    | 117/227 [00:02<00:02, 49.04it/s][A[A[A


 55%|█████▍    | 124/227 [00:02<00:01, 51.85it/s][A[A[A


 57%|█████▋    | 130/227 [00:02<00:01, 53.67it/s][A[A[A


 60%|█████▉    | 136/227 [00:02<00:01, 49.58it/s][A[A[A


 63%|██████▎   | 143/227 [00:02<00:01, 52.62it/s][A[A[A


 66%|██████▌   | 150/227 [00:02<00:01, 54.62it/s][A[A[A


 69%|██████▊   | 156/227 [00:02<00:01, 53.99it/s][A[A[A


 72%|███████▏  | 163/227 [00:03<00:01, 56.42it/s][A[A[A


 75%|███████▍  | 170/227 [00:03<00:00, 58.86it/s][A[A[A


 78%|███████▊  | 176/227 [00:03<00:00, 59.18it/s][A[A[A


 81%|████████  | 183/227 [00:03<00:00, 60.05it/s][A[A[A


 84%|████████▎ | 190/227 [00:03<00:00, 60.26it/s][A[A[A


 87%|████████▋ | 197/227 [00:03<00:00, 52.04it/s][A[A[A


 89%|████████▉ | 203/227 [00:03<00:00, 53.94it/s][A[A[A

Epoch 4 | Step 1100 | Train_loss 15.567249298095703 | Val_loss = 15.16825270652771





 92%|█████████▏| 209/227 [00:03<00:00, 54.44it/s][A[A[A


 95%|█████████▍| 215/227 [00:03<00:00, 54.94it/s][A[A[A


Epoch:  25%|██▌       | 5/20 [00:20<01:00,  4.04s/it][A[A

  0%|          | 0/227 [00:00<?, ?it/s][A[A

  1%|▏         | 3/227 [00:00<00:08, 27.69it/s][A[A

  4%|▍         | 10/227 [00:00<00:06, 33.67it/s][A[A

  7%|▋         | 17/227 [00:00<00:05, 39.15it/s][A[A

 10%|▉         | 22/227 [00:00<00:05, 40.14it/s][A[A

 12%|█▏        | 27/227 [00:00<00:04, 42.37it/s][A[A

 15%|█▍        | 33/227 [00:00<00:04, 45.66it/s][A[A

 18%|█▊        | 40/227 [00:00<00:03, 50.06it/s][A[A

 20%|██        | 46/227 [00:00<00:03, 49.69it/s][A[A

 23%|██▎       | 52/227 [00:00<00:03, 51.43it/s][A[A

 26%|██▌       | 59/227 [00:01<00:03, 55.79it/s][A[A

 29%|██▊       | 65/227 [00:01<00:02, 56.19it/s][A[A

 31%|███▏      | 71/227 [00:01<00:03, 47.67it/s][A[A

 34%|███▍      | 78/227 [00:01<00:02, 52.35it/s][A[A

Epoch 5 | Step 1200 | Train_loss 13.17887020111084 | Val_loss = 13.521368622779846




 38%|███▊      | 86/227 [00:01<00:02, 56.56it/s][A[A

 41%|████      | 93/227 [00:01<00:02, 57.29it/s][A[A

 44%|████▎     | 99/227 [00:01<00:02, 54.40it/s][A[A

 47%|████▋     | 106/227 [00:01<00:02, 57.80it/s][A[A

 50%|████▉     | 113/227 [00:02<00:01, 60.79it/s][A[A

 53%|█████▎    | 120/227 [00:02<00:01, 61.86it/s][A[A

 56%|█████▌    | 127/227 [00:02<00:01, 63.29it/s][A[A

 59%|█████▉    | 134/227 [00:02<00:01, 63.19it/s][A[A

 62%|██████▏   | 141/227 [00:02<00:01, 63.09it/s][A[A

 65%|██████▌   | 148/227 [00:02<00:01, 63.93it/s][A[A

 68%|██████▊   | 155/227 [00:02<00:01, 64.91it/s][A[A

 71%|███████▏  | 162/227 [00:02<00:01, 62.67it/s][A[A

 74%|███████▍  | 169/227 [00:02<00:01, 56.48it/s][A[A

 78%|███████▊  | 176/227 [00:03<00:00, 58.31it/s][A[A

Epoch 5 | Step 1300 | Train_loss 12.460677146911621 | Val_loss = 12.443233847618103




 81%|████████  | 183/227 [00:03<00:00, 60.51it/s][A[A

 84%|████████▎ | 190/227 [00:03<00:00, 55.46it/s][A[A

 86%|████████▋ | 196/227 [00:03<00:00, 53.06it/s][A[A

 89%|████████▉ | 203/227 [00:03<00:00, 55.96it/s][A[A

 93%|█████████▎| 210/227 [00:03<00:00, 58.11it/s][A[A

 96%|█████████▌| 217/227 [00:03<00:00, 60.92it/s][A[A

Epoch:  30%|███       | 6/20 [00:24<00:56,  4.01s/it][A



100%|██████████| 227/227 [00:03<00:00, 57.68it/s][A




  1%|▏         | 3/227 [00:00<00:08, 27.63it/s][A[A[A[A



  4%|▎         | 8/227 [00:00<00:07, 31.19it/s][A[A[A[A



  6%|▌         | 13/227 [00:00<00:06, 34.40it/s][A[A[A[A



  8%|▊         | 18/227 [00:00<00:05, 37.11it/s][A[A[A[A



 11%|█         | 24/227 [00:00<00:04, 40.88it/s][A[A[A[A



 14%|█▍        | 32/227 [00:00<00:04, 46.54it/s][A[A[A[A



 17%|█▋        | 39/227 [00:00<00:03, 47.03it/s][A[A[A[A



 20%|██        | 46/227 [00:00<00:03, 52.14it/s][A[A[A[A

Epoch 6 | Step 1400 | Train_loss 12.546931266784668 | Val_loss = 12.54689347743988






 23%|██▎       | 53/227 [00:01<00:03, 55.23it/s][A[A[A[A



 26%|██▋       | 60/227 [00:01<00:02, 58.92it/s][A[A[A[A



 30%|██▉       | 67/227 [00:01<00:02, 61.26it/s][A[A[A[A



 33%|███▎      | 74/227 [00:01<00:02, 63.59it/s][A[A[A[A



 36%|███▌      | 81/227 [00:01<00:02, 63.57it/s][A[A[A[A



 39%|███▉      | 88/227 [00:01<00:02, 62.96it/s][A[A[A[A



 42%|████▏     | 95/227 [00:01<00:02, 63.58it/s][A[A[A[A



 45%|████▍     | 102/227 [00:01<00:02, 60.47it/s][A[A[A[A



 48%|████▊     | 109/227 [00:01<00:01, 61.83it/s][A[A[A[A



 51%|█████     | 116/227 [00:02<00:01, 62.95it/s][A[A[A[A



 54%|█████▍    | 123/227 [00:02<00:01, 62.50it/s][A[A[A[A



 57%|█████▋    | 130/227 [00:02<00:01, 62.78it/s][A[A[A[A



 60%|██████    | 137/227 [00:02<00:01, 62.47it/s][A[A[A[A



 63%|██████▎   | 144/227 [00:02<00:01, 57.69it/s][A[A[A[A



 67%|██████▋   | 151/227 [00:02<00:01, 59.86it/s][A[A[A[A

Epoch 6 | Step 1500 | Train_loss 10.587894439697266 | Val_loss = 10.439477443695068






 70%|██████▉   | 158/227 [00:02<00:01, 60.21it/s][A[A[A[A



 73%|███████▎  | 165/227 [00:02<00:01, 61.93it/s][A[A[A[A



 76%|███████▌  | 172/227 [00:02<00:00, 63.84it/s][A[A[A[A



 79%|███████▉  | 179/227 [00:03<00:00, 64.65it/s][A[A[A[A



 82%|████████▏ | 187/227 [00:03<00:00, 66.69it/s][A[A[A[A



 85%|████████▌ | 194/227 [00:03<00:00, 66.56it/s][A[A[A[A



 89%|████████▊ | 201/227 [00:03<00:00, 65.82it/s][A[A[A[A



 92%|█████████▏| 208/227 [00:03<00:00, 66.19it/s][A[A[A[A



 95%|█████████▍| 215/227 [00:03<00:00, 66.58it/s][A[A[A[A



Epoch:  35%|███▌      | 7/20 [00:27<00:51,  3.93s/it][A[A[A

100%|██████████| 227/227 [00:03<00:00, 60.71it/s]


  3%|▎         | 6/227 [00:00<00:04, 54.17it/s][A[A

  5%|▌         | 12/227 [00:00<00:04, 51.50it/s][A[A

  8%|▊         | 19/227 [00:00<00:03, 55.05it/s][A[A

Epoch 7 | Step 1600 | Train_loss 8.285905838012695 | Val_loss = 8.163893699645996




 11%|█▏        | 26/227 [00:00<00:03, 56.97it/s][A[A

 15%|█▍        | 33/227 [00:00<00:03, 58.79it/s][A[A

 18%|█▊        | 40/227 [00:00<00:03, 61.62it/s][A[A

 21%|██        | 47/227 [00:00<00:02, 63.13it/s][A[A

 24%|██▍       | 54/227 [00:00<00:02, 64.14it/s][A[A

 27%|██▋       | 61/227 [00:00<00:02, 65.36it/s][A[A

 30%|██▉       | 68/227 [00:01<00:02, 66.28it/s][A[A

 33%|███▎      | 75/227 [00:01<00:02, 66.47it/s][A[A

 36%|███▌      | 82/227 [00:01<00:02, 65.86it/s][A[A

 40%|███▉      | 90/227 [00:01<00:02, 67.68it/s][A[A

 43%|████▎     | 97/227 [00:01<00:01, 67.34it/s][A[A

 46%|████▌     | 104/227 [00:01<00:01, 64.60it/s][A[A

 49%|████▉     | 111/227 [00:01<00:01, 64.36it/s][A[A

 52%|█████▏    | 118/227 [00:01<00:01, 59.17it/s][A[A

 56%|█████▌    | 126/227 [00:01<00:01, 62.35it/s][A[A

Epoch 7 | Step 1700 | Train_loss 6.5888895988464355 | Val_loss = 6.337394416332245




 59%|█████▊    | 133/227 [00:02<00:01, 63.45it/s][A[A
100%|██████████| 227/227 [00:22<00:00, 60.26it/s][A

 62%|██████▏   | 140/227 [00:02<00:01, 64.19it/s][A[A

 65%|██████▍   | 147/227 [00:02<00:01, 65.18it/s][A[A

 68%|██████▊   | 154/227 [00:02<00:01, 65.79it/s][A[A

 71%|███████   | 161/227 [00:02<00:01, 65.79it/s][A[A

 74%|███████▍  | 168/227 [00:02<00:00, 66.42it/s][A[A

 77%|███████▋  | 175/227 [00:02<00:00, 66.69it/s][A[A

 80%|████████  | 182/227 [00:02<00:00, 64.88it/s][A[A

 84%|████████▎ | 190/227 [00:02<00:00, 66.38it/s][A[A

 87%|████████▋ | 197/227 [00:03<00:00, 66.65it/s][A[A

 90%|████████▉ | 204/227 [00:03<00:00, 66.48it/s][A[A

 93%|█████████▎| 211/227 [00:03<00:00, 67.05it/s][A[A

 96%|█████████▌| 218/227 [00:03<00:00, 60.90it/s][A[A

 99%|█████████▉| 225/227 [00:03<00:00, 63.19it/s][A[A

Epoch 7 | Step 1800 | Train_loss 5.786288738250732 | Val_loss = 5.667757630348206


Epoch:  40%|████      | 8/20 [00:31<00:45,  3.82s/it]



  0%|          | 0/227 [00:00<?, ?it/s][A[A[A[A



  2%|▏         | 4/227 [00:00<00:05, 39.71it/s][A[A[A[A



  5%|▍         | 11/227 [00:00<00:04, 45.11it/s][A[A[A[A



  8%|▊         | 18/227 [00:00<00:04, 49.48it/s][A[A[A[A



 11%|█         | 25/227 [00:00<00:03, 53.68it/s][A[A[A[A



 14%|█▍        | 32/227 [00:00<00:03, 57.67it/s][A[A[A[A



 17%|█▋        | 39/227 [00:00<00:03, 60.49it/s][A[A[A[A



 20%|██        | 46/227 [00:00<00:02, 62.47it/s][A[A[A[A



 23%|██▎       | 53/227 [00:00<00:02, 61.59it/s][A[A[A[A



 26%|██▋       | 60/227 [00:00<00:02, 61.92it/s][A[A[A[A



 30%|██▉       | 67/227 [00:01<00:02, 62.44it/s][A[A[A[A



 33%|███▎      | 74/227 [00:01<00:02, 63.92it/s][A[A[A[A



 36%|███▌      | 81/227 [00:01<00:02, 63.59it/s][A[A[A[A



 39%|███▉      | 88/227 [00:01<00:02, 51.67it/s][A[A[A[A



 41%|████▏     | 94/227 [00:01<00:02, 52.31it/s][A[A[A

Epoch 8 | Step 1900 | Train_loss 5.352069854736328 | Val_loss = 5.300309896469116






 44%|████▍     | 101/227 [00:01<00:02, 55.44it/s][A[A[A[A



 48%|████▊     | 108/227 [00:01<00:02, 58.42it/s][A[A[A[A



 51%|█████     | 115/227 [00:01<00:01, 59.31it/s][A[A[A[A



 54%|█████▎    | 122/227 [00:02<00:01, 61.79it/s][A[A[A[A



 57%|█████▋    | 129/227 [00:02<00:01, 60.96it/s][A[A[A[A



 60%|█████▉    | 136/227 [00:02<00:01, 54.51it/s][A[A[A[A



 63%|██████▎   | 142/227 [00:02<00:01, 55.64it/s][A[A[A[A



 66%|██████▌   | 149/227 [00:02<00:01, 57.43it/s][A[A[A[A



 68%|██████▊   | 155/227 [00:02<00:01, 57.23it/s][A[A[A[A



 71%|███████▏  | 162/227 [00:02<00:01, 59.16it/s][A[A[A[A



 74%|███████▍  | 169/227 [00:02<00:00, 61.75it/s][A[A[A[A



 78%|███████▊  | 176/227 [00:02<00:00, 63.17it/s][A[A[A[A



 81%|████████  | 183/227 [00:03<00:00, 64.35it/s][A[A[A[A



 84%|████████▎ | 190/227 [00:03<00:00, 58.78it/s][A[A[A[A



 87%|████████▋ | 197/227 [00:03<00:00, 61.32it/s][A[A[A[A

Epoch 8 | Step 2000 | Train_loss 5.165165901184082 | Val_loss = 4.907677173614502






 90%|████████▉ | 204/227 [00:03<00:00, 61.77it/s][A[A[A[A



 93%|█████████▎| 212/227 [00:03<00:00, 64.29it/s][A[A[A[A



 96%|█████████▋| 219/227 [00:03<00:00, 64.59it/s][A[A[A[A



Epoch:  45%|████▌     | 9/20 [00:35<00:41,  3.79s/it][A[A[A




100%|██████████| 227/227 [00:03<00:00, 60.79it/s][A[A





  2%|▏         | 5/227 [00:00<00:04, 49.35it/s][A[A[A[A[A




  5%|▌         | 12/227 [00:00<00:04, 53.12it/s][A[A[A[A[A




  8%|▊         | 19/227 [00:00<00:03, 55.99it/s][A[A[A[A[A




 11%|█▏        | 26/227 [00:00<00:03, 57.86it/s][A[A[A[A[A




 15%|█▍        | 33/227 [00:00<00:03, 60.60it/s][A[A[A[A[A




 18%|█▊        | 40/227 [00:00<00:03, 61.82it/s][A[A[A[A[A




 21%|██        | 47/227 [00:00<00:02, 62.54it/s][A[A[A[A[A




 24%|██▍       | 54/227 [00:00<00:02, 63.24it/s][A[A[A[A[A




 27%|██▋       | 61/227 [00:01<00:02, 57.61it/s][A[A[A[A[A




 30%|██▉       | 68/227 [00:01<00:02, 59.62it/s][A[A[A[A[A

Epoch 9 | Step 2100 | Train_loss 4.805555820465088 | Val_loss = 4.646418750286102







 33%|███▎      | 75/227 [00:01<00:02, 61.77it/s][A[A[A[A[A




 36%|███▌      | 82/227 [00:01<00:02, 62.27it/s][A[A[A[A[A




 39%|███▉      | 89/227 [00:01<00:02, 63.29it/s][A[A[A[A[A




 42%|████▏     | 96/227 [00:01<00:02, 64.68it/s][A[A[A[A[A




 45%|████▌     | 103/227 [00:01<00:01, 64.53it/s][A[A[A[A[A




 48%|████▊     | 110/227 [00:01<00:01, 64.50it/s][A[A[A[A[A




 52%|█████▏    | 117/227 [00:01<00:01, 65.03it/s][A[A[A[A[A




 55%|█████▍    | 124/227 [00:01<00:01, 64.49it/s][A[A[A[A[A




 58%|█████▊    | 131/227 [00:02<00:01, 61.45it/s][A[A[A[A[A




 61%|██████    | 138/227 [00:02<00:01, 63.42it/s][A[A[A[A[A




 64%|██████▍   | 145/227 [00:02<00:01, 64.33it/s][A[A[A[A[A




 67%|██████▋   | 152/227 [00:02<00:01, 65.58it/s][A[A[A[A[A




 70%|███████   | 159/227 [00:02<00:01, 59.57it/s][A[A[A[A[A




 73%|███████▎  | 166/227 [00:02<00:00, 61.55it/s][A[A[A[A[A

Epoch 9 | Step 2200 | Train_loss 4.635091304779053 | Val_loss = 4.533062279224396







 76%|███████▌  | 173/227 [00:02<00:00, 62.46it/s][A[A[A[A[A




 79%|███████▉  | 180/227 [00:02<00:00, 62.58it/s][A[A[A[A[A




 82%|████████▏ | 187/227 [00:02<00:00, 63.60it/s][A[A[A[A[A




 85%|████████▌ | 194/227 [00:03<00:00, 64.35it/s][A[A[A[A[A




 89%|████████▊ | 201/227 [00:03<00:00, 65.69it/s][A[A[A[A[A




 92%|█████████▏| 208/227 [00:03<00:00, 66.43it/s][A[A[A[A[A




 95%|█████████▍| 215/227 [00:03<00:00, 65.60it/s][A[A[A[A[A




Epoch:  50%|█████     | 10/20 [00:38<00:37,  3.73s/it]A[A[A[A



100%|██████████| 227/227 [00:03<00:00, 63.53it/s][A




  2%|▏         | 5/227 [00:00<00:04, 48.02it/s][A[A[A[A



  5%|▍         | 11/227 [00:00<00:04, 51.07it/s][A[A[A[A



  8%|▊         | 18/227 [00:00<00:03, 55.14it/s][A[A[A[A



 11%|█         | 25/227 [00:00<00:03, 58.75it/s][A[A[A[A



 14%|█▎        | 31/227 [00:00<00:03, 54.40it/s][A[A[A[A



 17%|█▋        | 38/227 [00:00<00:03, 57.97it/s][A[A[A[A

Epoch 10 | Step 2300 | Train_loss 4.539700984954834 | Val_loss = 4.3557738065719604






 20%|█▉        | 45/227 [00:00<00:02, 60.86it/s][A[A[A[A



 23%|██▎       | 52/227 [00:00<00:02, 61.62it/s][A[A[A[A



 26%|██▌       | 59/227 [00:00<00:02, 62.92it/s][A[A[A[A



 29%|██▉       | 66/227 [00:01<00:02, 64.04it/s][A[A[A[A



 32%|███▏      | 73/227 [00:01<00:02, 65.41it/s][A[A[A[A



 35%|███▌      | 80/227 [00:01<00:02, 64.62it/s][A[A[A[A


100%|██████████| 227/227 [00:24<00:00, 56.31it/s][A[A[A



 38%|███▊      | 87/227 [00:01<00:02, 64.04it/s][A[A[A[A



 41%|████▏     | 94/227 [00:01<00:02, 65.21it/s][A[A[A[A



 44%|████▍     | 101/227 [00:01<00:01, 64.08it/s][A[A[A[A



 48%|████▊     | 109/227 [00:01<00:01, 66.35it/s][A[A[A[A



 51%|█████     | 116/227 [00:01<00:01, 65.14it/s][A[A[A[A



 54%|█████▍    | 123/227 [00:01<00:01, 65.59it/s][A[A[A[A



 57%|█████▋    | 130/227 [00:02<00:01, 65.21it/s][A[A[A[A



 60%|██████    | 137/227 [00:02<00:01, 60.01it/s][A[A[A[A



 63%|██████▎   | 144/227 [00:02<0

Epoch 10 | Step 2400 | Train_loss 4.329449653625488 | Val_loss = 4.221425622701645






 67%|██████▋   | 151/227 [00:02<00:01, 63.22it/s][A[A[A[A



 70%|██████▉   | 158/227 [00:02<00:01, 64.77it/s][A[A[A[A



 73%|███████▎  | 165/227 [00:02<00:00, 65.71it/s][A[A[A[A



 76%|███████▌  | 172/227 [00:02<00:00, 66.62it/s][A[A[A[A



 79%|███████▉  | 179/227 [00:02<00:00, 66.39it/s][A[A[A[A



 82%|████████▏ | 186/227 [00:02<00:00, 67.05it/s][A[A[A[A



 85%|████████▌ | 193/227 [00:03<00:00, 67.61it/s][A[A[A[A



 88%|████████▊ | 200/227 [00:03<00:00, 64.14it/s][A[A[A[A



 91%|█████████ | 207/227 [00:03<00:00, 56.64it/s][A[A[A[A



 94%|█████████▍| 213/227 [00:03<00:00, 50.11it/s][A[A[A[A



 96%|█████████▋| 219/227 [00:03<00:00, 50.84it/s][A[A[A[A



Epoch:  55%|█████▌    | 11/20 [00:42<00:33,  3.71s/it]A[A[A




  0%|          | 0/227 [00:00<?, ?it/s][A[A[A[A[A




  2%|▏         | 4/227 [00:00<00:07, 29.31it/s][A[A[A[A[A




  5%|▍         | 11/227 [00:00<00:06, 35.44it/s][A[A[A[A[A

Epoch 11 | Step 2500 | Train_loss 4.236222743988037 | Val_loss = 4.1081763207912445







  8%|▊         | 18/227 [00:00<00:05, 41.17it/s][A[A[A[A[A




 11%|█         | 25/227 [00:00<00:04, 46.40it/s][A[A[A[A[A




 14%|█▍        | 32/227 [00:00<00:03, 50.42it/s][A[A[A[A[A




 17%|█▋        | 39/227 [00:00<00:03, 53.87it/s][A[A[A[A[A




 20%|██        | 46/227 [00:00<00:03, 55.54it/s][A[A[A[A[A




 23%|██▎       | 52/227 [00:00<00:03, 50.40it/s][A[A[A[A[A




 26%|██▌       | 58/227 [00:01<00:03, 49.56it/s][A[A[A[A[A




 29%|██▊       | 65/227 [00:01<00:03, 53.25it/s][A[A[A[A[A




 32%|███▏      | 72/227 [00:01<00:02, 56.00it/s][A[A[A[A[A




 35%|███▍      | 79/227 [00:01<00:02, 57.74it/s][A[A[A[A[A




 38%|███▊      | 86/227 [00:01<00:02, 58.91it/s][A[A[A[A[A




 41%|████      | 92/227 [00:01<00:02, 58.87it/s][A[A[A[A[A




 44%|████▎     | 99/227 [00:01<00:02, 61.57it/s][A[A[A[A[A




 47%|████▋     | 106/227 [00:01<00:02, 56.85it/s][A[A[A[A[A




 50%|████▉     | 113/227 [00:01<00:01, 58.

Epoch 11 | Step 2600 | Train_loss 4.072806358337402 | Val_loss = 4.006423652172089







 53%|█████▎    | 120/227 [00:02<00:01, 59.44it/s][A[A[A[A[A




 56%|█████▌    | 127/227 [00:02<00:01, 62.14it/s][A[A[A[A[A




 59%|█████▉    | 134/227 [00:02<00:01, 62.16it/s][A[A[A[A[A




 62%|██████▏   | 141/227 [00:02<00:01, 62.84it/s][A[A[A[A[A




 65%|██████▌   | 148/227 [00:02<00:01, 62.72it/s][A[A[A[A[A




 68%|██████▊   | 155/227 [00:02<00:01, 63.20it/s][A[A[A[A[A




 71%|███████▏  | 162/227 [00:02<00:01, 62.30it/s][A[A[A[A[A




 74%|███████▍  | 169/227 [00:02<00:00, 64.20it/s][A[A[A[A[A




 78%|███████▊  | 176/227 [00:02<00:00, 65.19it/s][A[A[A[A[A




 81%|████████  | 183/227 [00:03<00:00, 65.81it/s][A[A[A[A[A




 84%|████████▎ | 190/227 [00:03<00:00, 65.59it/s][A[A[A[A[A




 87%|████████▋ | 197/227 [00:03<00:00, 66.00it/s][A[A[A[A[A




 90%|████████▉ | 204/227 [00:03<00:00, 60.23it/s][A[A[A[A[A




 93%|█████████▎| 211/227 [00:03<00:00, 60.90it/s][A[A[A[A[A

Epoch 11 | Step 2700 | Train_loss 4.051764965057373 | Val_loss = 3.914465695619583







 96%|█████████▌| 218/227 [00:03<00:00, 61.99it/s][A[A[A[A[A




Epoch:  60%|██████    | 12/20 [00:46<00:29,  3.72s/it]A[A[A[A





100%|██████████| 227/227 [00:03<00:00, 60.46it/s][A[A[A






  2%|▏         | 5/227 [00:00<00:04, 47.49it/s][A[A[A[A[A[A





  5%|▌         | 12/227 [00:00<00:04, 51.67it/s][A[A[A[A[A[A





  8%|▊         | 18/227 [00:00<00:03, 53.71it/s][A[A[A[A[A[A





 11%|█         | 25/227 [00:00<00:03, 55.90it/s][A[A[A[A[A[A





 14%|█▍        | 32/227 [00:00<00:03, 58.49it/s][A[A[A[A[A[A





 17%|█▋        | 39/227 [00:00<00:03, 60.40it/s][A[A[A[A[A[A





 20%|██        | 46/227 [00:00<00:02, 62.59it/s][A[A[A[A[A[A





 23%|██▎       | 53/227 [00:00<00:02, 62.51it/s][A[A[A[A[A[A





 26%|██▋       | 60/227 [00:00<00:02, 63.05it/s][A[A[A[A[A[A





 30%|██▉       | 67/227 [00:01<00:02, 62.81it/s][A[A[A[A[A[A





 33%|███▎      | 74/227 [00:01<00:02, 63.63it/s][A[A[A[A[A[A





 

Epoch 12 | Step 2800 | Train_loss 3.944195508956909 | Val_loss = 3.8701398074626923








 41%|████▏     | 94/227 [00:01<00:02, 59.23it/s][A[A[A[A[A[A





 44%|████▍     | 101/227 [00:01<00:02, 59.93it/s][A[A[A[A[A[A





 48%|████▊     | 108/227 [00:01<00:01, 60.83it/s][A[A[A[A[A[A





 51%|█████     | 115/227 [00:01<00:01, 62.52it/s][A[A[A[A[A[A





 54%|█████▎    | 122/227 [00:01<00:01, 63.19it/s][A[A[A[A[A[A





 57%|█████▋    | 129/227 [00:02<00:01, 63.43it/s][A[A[A[A[A[A





 60%|█████▉    | 136/227 [00:02<00:01, 63.75it/s][A[A[A[A[A[A





 63%|██████▎   | 143/227 [00:02<00:01, 65.13it/s][A[A[A[A[A[A





 66%|██████▌   | 150/227 [00:02<00:01, 65.92it/s][A[A[A[A[A[A





 69%|██████▉   | 157/227 [00:02<00:01, 65.39it/s][A[A[A[A[A[A





 72%|███████▏  | 164/227 [00:02<00:00, 66.07it/s][A[A[A[A[A[A





 75%|███████▌  | 171/227 [00:02<00:00, 65.47it/s][A[A[A[A[A[A





 78%|███████▊  | 178/227 [00:02<00:00, 59.80it/s][A[A[A[A[A[A





 81%|████████▏ | 185/227 [00:02<00:00, 62.09it

Epoch 12 | Step 2900 | Train_loss 3.8093655109405518 | Val_loss = 3.838913321495056








 85%|████████▍ | 192/227 [00:03<00:00, 63.84it/s][A[A[A[A[A[A





 88%|████████▊ | 199/227 [00:03<00:00, 64.93it/s][A[A[A[A[A[A





 91%|█████████ | 206/227 [00:03<00:00, 63.47it/s][A[A[A[A[A[A





 94%|█████████▍| 213/227 [00:03<00:00, 64.33it/s][A[A[A[A[A[A





 97%|█████████▋| 220/227 [00:03<00:00, 65.40it/s][A[A[A[A[A[A





Epoch:  65%|██████▌   | 13/20 [00:49<00:25,  3.69s/it]A[A[A[A[A




100%|██████████| 227/227 [00:03<00:00, 62.84it/s][A[A





  2%|▏         | 5/227 [00:00<00:04, 49.45it/s][A[A[A[A[A




  6%|▌         | 13/227 [00:00<00:03, 54.23it/s][A[A[A[A[A

100%|██████████| 227/227 [00:22<00:00, 63.19it/s][A[A




  9%|▉         | 20/227 [00:00<00:03, 57.41it/s][A[A[A[A[A




 12%|█▏        | 27/227 [00:00<00:03, 59.04it/s][A[A[A[A[A




 15%|█▍        | 33/227 [00:00<00:03, 59.18it/s][A[A[A[A[A




 18%|█▊        | 40/227 [00:00<00:03, 61.05it/s][A[A[A[A[A




 21%|██        | 47/227 [00:00<00

Epoch 13 | Step 3000 | Train_loss 3.9606306552886963 | Val_loss = 3.7875111997127533







 30%|██▉       | 67/227 [00:01<00:02, 60.69it/s][A[A[A[A[A




 33%|███▎      | 74/227 [00:01<00:02, 61.29it/s][A[A[A[A[A




 36%|███▌      | 81/227 [00:01<00:02, 62.11it/s][A[A[A[A[A




 39%|███▉      | 88/227 [00:01<00:02, 62.69it/s][A[A[A[A[A




 42%|████▏     | 95/227 [00:01<00:02, 63.57it/s][A[A[A[A[A




 45%|████▍     | 102/227 [00:01<00:01, 63.48it/s][A[A[A[A[A




 48%|████▊     | 109/227 [00:01<00:01, 64.76it/s][A[A[A[A[A




 51%|█████     | 116/227 [00:01<00:01, 63.81it/s][A[A[A[A[A




 54%|█████▍    | 123/227 [00:01<00:01, 63.80it/s][A[A[A[A[A




 57%|█████▋    | 130/227 [00:02<00:01, 65.00it/s][A[A[A[A[A




 60%|██████    | 137/227 [00:02<00:01, 65.10it/s][A[A[A[A[A




 63%|██████▎   | 144/227 [00:02<00:01, 66.10it/s][A[A[A[A[A




 67%|██████▋   | 151/227 [00:02<00:01, 58.54it/s][A[A[A[A[A




 70%|██████▉   | 158/227 [00:02<00:01, 60.79it/s][A[A[A[A[A

Epoch 13 | Step 3100 | Train_loss 3.813363552093506 | Val_loss = 3.718378961086273







 73%|███████▎  | 165/227 [00:02<00:00, 62.16it/s][A[A[A[A[A




 76%|███████▌  | 172/227 [00:02<00:00, 63.05it/s][A[A[A[A[A




 79%|███████▉  | 179/227 [00:02<00:00, 63.15it/s][A[A[A[A[A




 82%|████████▏ | 186/227 [00:02<00:00, 62.99it/s][A[A[A[A[A




 85%|████████▌ | 193/227 [00:03<00:00, 63.79it/s][A[A[A[A[A




 88%|████████▊ | 200/227 [00:03<00:00, 64.19it/s][A[A[A[A[A




 91%|█████████ | 207/227 [00:03<00:00, 60.54it/s][A[A[A[A[A




 94%|█████████▍| 214/227 [00:03<00:00, 62.55it/s][A[A[A[A[A




Epoch:  70%|███████   | 14/20 [00:53<00:22,  3.67s/it]A[A[A[A





  0%|          | 0/227 [00:00<?, ?it/s][A[A[A[A[A[A





  2%|▏         | 5/227 [00:00<00:04, 47.52it/s][A[A[A[A[A[A





  5%|▌         | 12/227 [00:00<00:04, 51.77it/s][A[A[A[A[A[A





  8%|▊         | 19/227 [00:00<00:03, 54.98it/s][A[A[A[A[A[A





 11%|█         | 24/227 [00:00<00:03, 50.98it/s][A[A[A[A[A[A





 14%|█▎        | 31/227

Epoch 14 | Step 3200 | Train_loss 3.7566399574279785 | Val_loss = 3.64483842253685








 17%|█▋        | 38/227 [00:00<00:03, 58.62it/s][A[A[A[A[A[A





 20%|█▉        | 45/227 [00:00<00:03, 59.95it/s][A[A[A[A[A[A





 23%|██▎       | 52/227 [00:00<00:02, 61.96it/s][A[A[A[A[A[A





 26%|██▌       | 59/227 [00:00<00:02, 62.41it/s][A[A[A[A[A[A





 29%|██▉       | 66/227 [00:01<00:02, 62.92it/s][A[A[A[A[A[A





 32%|███▏      | 73/227 [00:01<00:02, 63.01it/s][A[A[A[A[A[A





 35%|███▌      | 80/227 [00:01<00:02, 64.56it/s][A[A[A[A[A[A





 38%|███▊      | 87/227 [00:01<00:02, 64.19it/s][A[A[A[A[A[A





 42%|████▏     | 95/227 [00:01<00:01, 66.07it/s][A[A[A[A[A[A





 45%|████▍     | 102/227 [00:01<00:01, 64.93it/s][A[A[A[A[A[A





 48%|████▊     | 109/227 [00:01<00:01, 65.83it/s][A[A[A[A[A[A





 51%|█████     | 116/227 [00:01<00:01, 66.40it/s][A[A[A[A[A[A





 54%|█████▍    | 123/227 [00:01<00:01, 59.54it/s][A[A[A[A[A[A





 57%|█████▋    | 130/227 [00:02<00:01, 61.01it/s][A[

Epoch 14 | Step 3300 | Train_loss 3.730405807495117 | Val_loss = 3.59549018740654








 60%|██████    | 137/227 [00:02<00:01, 62.08it/s][A[A[A[A[A[A





 63%|██████▎   | 144/227 [00:02<00:01, 62.06it/s][A[A[A[A[A[A





 67%|██████▋   | 151/227 [00:02<00:01, 63.31it/s][A[A[A[A[A[A





 70%|██████▉   | 158/227 [00:02<00:01, 63.61it/s][A[A[A[A[A[A





 73%|███████▎  | 165/227 [00:02<00:00, 64.74it/s][A[A[A[A[A[A





 76%|███████▌  | 172/227 [00:02<00:00, 65.08it/s][A[A[A[A[A[A





 79%|███████▉  | 179/227 [00:02<00:00, 65.18it/s][A[A[A[A[A[A





 82%|████████▏ | 186/227 [00:02<00:00, 63.57it/s][A[A[A[A[A[A





 85%|████████▌ | 193/227 [00:03<00:00, 63.03it/s][A[A[A[A[A[A





 88%|████████▊ | 200/227 [00:03<00:00, 64.26it/s][A[A[A[A[A[A





 91%|█████████ | 207/227 [00:03<00:00, 63.56it/s][A[A[A[A[A[A





 94%|█████████▍| 214/227 [00:03<00:00, 64.66it/s][A[A[A[A[A[A





Epoch:  75%|███████▌  | 15/20 [00:56<00:18,  3.66s/it]A[A[A[A[A






100%|██████████| 227/227 [00:03<00:00, 62.40

Epoch 14 | Step 3400 | Train_loss 3.6670186519622803 | Val_loss = 3.5466843247413635









  5%|▌         | 12/227 [00:00<00:04, 51.41it/s][A[A[A[A[A[A[A






  8%|▊         | 19/227 [00:00<00:03, 55.44it/s][A[A[A[A[A[A[A






 11%|█▏        | 26/227 [00:00<00:03, 58.39it/s][A[A[A[A[A[A[A






 15%|█▍        | 33/227 [00:00<00:03, 59.36it/s][A[A[A[A[A[A[A






 18%|█▊        | 40/227 [00:00<00:03, 61.41it/s][A[A[A[A[A[A[A






 20%|██        | 46/227 [00:00<00:03, 59.47it/s][A[A[A[A[A[A[A






 23%|██▎       | 53/227 [00:00<00:02, 61.87it/s][A[A[A[A[A[A[A






 26%|██▋       | 60/227 [00:00<00:02, 63.81it/s][A[A[A[A[A[A[A






 30%|██▉       | 67/227 [00:01<00:02, 63.96it/s][A[A[A[A[A[A[A






 33%|███▎      | 74/227 [00:01<00:02, 64.79it/s][A[A[A[A[A[A[A






 36%|███▌      | 81/227 [00:01<00:02, 66.02it/s][A[A[A[A[A[A[A






 39%|███▉      | 88/227 [00:01<00:02, 66.87it/s][A[A[A[A[A[A[A






 42%|████▏     | 95/227 [00:01<00:01, 66.81it/s][A[A[A[A[A[A[A






 45%|

Epoch 15 | Step 3500 | Train_loss 3.670837163925171 | Val_loss = 3.497759997844696









 51%|█████     | 116/227 [00:01<00:01, 63.57it/s][A[A[A[A[A[A[A






 54%|█████▍    | 123/227 [00:01<00:01, 64.02it/s][A[A[A[A[A[A[A






 57%|█████▋    | 130/227 [00:02<00:01, 65.38it/s][A[A[A[A[A[A[A






 60%|██████    | 137/227 [00:02<00:01, 66.13it/s][A[A[A[A[A[A[A






 63%|██████▎   | 144/227 [00:02<00:01, 65.42it/s][A[A[A[A[A[A[A






 67%|██████▋   | 151/227 [00:02<00:01, 65.47it/s][A[A[A[A[A[A[A






 70%|██████▉   | 158/227 [00:02<00:01, 65.38it/s][A[A[A[A[A[A[A






 73%|███████▎  | 165/227 [00:02<00:00, 65.12it/s][A[A[A[A[A[A[A






 76%|███████▌  | 172/227 [00:02<00:00, 66.23it/s][A[A[A[A[A[A[A






 79%|███████▉  | 179/227 [00:02<00:00, 65.00it/s][A[A[A[A[A[A[A






 82%|████████▏ | 186/227 [00:02<00:00, 60.24it/s][A[A[A[A[A[A[A



100%|██████████| 227/227 [00:21<00:00, 55.08it/s][A[A[A[A






 85%|████████▌ | 193/227 [00:03<00:00, 57.91it/s][A[A[A[A[A[A[A






 88%

Epoch 15 | Step 3600 | Train_loss 3.6319830417633057 | Val_loss = 3.465664714574814









 93%|█████████▎| 211/227 [00:03<00:00, 51.15it/s][A[A[A[A[A[A[A






 96%|█████████▌| 217/227 [00:03<00:00, 52.37it/s][A[A[A[A[A[A[A






Epoch:  80%|████████  | 16/20 [01:00<00:14,  3.67s/it]A[A[A[A[A[A





  0%|          | 0/227 [00:00<?, ?it/s][A[A[A[A[A[A





  2%|▏         | 4/227 [00:00<00:05, 37.25it/s][A[A[A[A[A[A





  4%|▍         | 10/227 [00:00<00:05, 41.85it/s][A[A[A[A[A[A





  7%|▋         | 15/227 [00:00<00:04, 42.82it/s][A[A[A[A[A[A





  8%|▊         | 19/227 [00:00<00:04, 41.73it/s][A[A[A[A[A[A





 11%|█         | 24/227 [00:00<00:04, 43.32it/s][A[A[A[A[A[A





 13%|█▎        | 30/227 [00:00<00:04, 46.96it/s][A[A[A[A[A[A





 15%|█▌        | 35/227 [00:00<00:04, 43.68it/s][A[A[A[A[A[A





 19%|█▊        | 42/227 [00:00<00:03, 48.38it/s][A[A[A[A[A[A





 22%|██▏       | 49/227 [00:00<00:03, 52.07it/s][A[A[A[A[A[A





 24%|██▍       | 55/227 [00:01<00:03, 49.93it/s][A[

Epoch 16 | Step 3700 | Train_loss 3.6317250728607178 | Val_loss = 3.440155267715454








 35%|███▌      | 80/227 [00:01<00:03, 46.64it/s][A[A[A[A[A[A





 38%|███▊      | 86/227 [00:01<00:02, 49.16it/s][A[A[A[A[A[A





 41%|████      | 92/227 [00:01<00:02, 47.80it/s][A[A[A[A[A[A





 44%|████▎     | 99/227 [00:01<00:02, 51.70it/s][A[A[A[A[A[A





 46%|████▋     | 105/227 [00:02<00:02, 50.11it/s][A[A[A[A[A[A





 49%|████▉     | 111/227 [00:02<00:02, 50.01it/s][A[A[A[A[A[A





 52%|█████▏    | 117/227 [00:02<00:02, 46.73it/s][A[A[A[A[A[A





 54%|█████▎    | 122/227 [00:02<00:02, 47.42it/s][A[A[A[A[A[A





 56%|█████▋    | 128/227 [00:02<00:01, 50.42it/s][A[A[A[A[A[A





 59%|█████▉    | 134/227 [00:02<00:01, 52.95it/s][A[A[A[A[A[A





 62%|██████▏   | 140/227 [00:02<00:01, 51.03it/s][A[A[A[A[A[A





 64%|██████▍   | 146/227 [00:02<00:01, 48.30it/s][A[A[A[A[A[A





 67%|██████▋   | 153/227 [00:03<00:01, 51.93it/s][A[A[A[A[A[A





 70%|███████   | 159/227 [00:03<00:01, 53.55it/s]

Epoch 16 | Step 3800 | Train_loss 3.4945507049560547 | Val_loss = 3.4018774330615997








 80%|███████▉  | 181/227 [00:03<00:01, 45.06it/s][A[A[A[A[A[A





 82%|████████▏ | 187/227 [00:03<00:00, 47.54it/s][A[A[A[A[A[A





 85%|████████▌ | 193/227 [00:03<00:00, 47.70it/s][A[A[A[A[A[A





 87%|████████▋ | 198/227 [00:04<00:00, 46.85it/s][A[A[A[A[A[A





 90%|████████▉ | 204/227 [00:04<00:00, 49.73it/s][A[A[A[A[A[A





 93%|█████████▎| 210/227 [00:04<00:00, 52.27it/s][A[A[A[A[A[A





 95%|█████████▌| 216/227 [00:04<00:00, 52.20it/s][A[A[A[A[A[A





Epoch:  85%|████████▌ | 17/20 [01:05<00:11,  3.95s/it]A[A[A[A[A







100%|██████████| 227/227 [00:04<00:00, 49.36it/s][A[A[A[A[A








  2%|▏         | 5/227 [00:00<00:05, 42.55it/s][A[A[A[A[A[A[A[A







  4%|▍         | 10/227 [00:00<00:05, 42.12it/s][A[A[A[A[A[A[A[A







  7%|▋         | 15/227 [00:00<00:04, 43.85it/s][A[A[A[A[A[A[A[A







 10%|▉         | 22/227 [00:00<00:04, 48.62it/s][A[A[A[A[A[A[A[A







 13%|█▎        | 

Epoch 17 | Step 3900 | Train_loss 3.4162533283233643 | Val_loss = 3.3584722578525543










 25%|██▌       | 57/227 [00:01<00:03, 44.32it/s][A[A[A[A[A[A[A[A







 28%|██▊       | 63/227 [00:01<00:03, 47.92it/s][A[A[A[A[A[A[A[A







 30%|███       | 69/227 [00:01<00:03, 50.86it/s][A[A[A[A[A[A[A[A







 33%|███▎      | 75/227 [00:01<00:02, 52.37it/s][A[A[A[A[A[A[A[A







 36%|███▌      | 82/227 [00:01<00:02, 54.91it/s][A[A[A[A[A[A[A[A







 39%|███▉      | 88/227 [00:01<00:02, 52.19it/s][A[A[A[A[A[A[A[A







 41%|████▏     | 94/227 [00:01<00:02, 49.79it/s][A[A[A[A[A[A[A[A







 44%|████▍     | 101/227 [00:01<00:02, 51.77it/s][A[A[A[A[A[A[A[A







 47%|████▋     | 107/227 [00:02<00:02, 51.86it/s][A[A[A[A[A[A[A[A







 50%|████▉     | 113/227 [00:02<00:02, 48.94it/s][A[A[A[A[A[A[A[A







 52%|█████▏    | 119/227 [00:02<00:02, 50.76it/s][A[A[A[A[A[A[A[A







 55%|█████▌    | 125/227 [00:02<00:01, 52.62it/s][A[A[A[A[A[A[A[A







 58%|█████▊    | 131/227 [0

Epoch 17 | Step 4000 | Train_loss 3.410477876663208 | Val_loss = 3.3140670359134674










 69%|██████▊   | 156/227 [00:03<00:01, 52.71it/s][A[A[A[A[A[A[A[A







 71%|███████▏  | 162/227 [00:03<00:01, 47.83it/s][A[A[A[A[A[A[A[A







 74%|███████▍  | 169/227 [00:03<00:01, 51.80it/s][A[A[A[A[A[A[A[A







 77%|███████▋  | 175/227 [00:03<00:00, 53.04it/s][A[A[A[A[A[A[A[A







 80%|███████▉  | 181/227 [00:03<00:00, 47.91it/s][A[A[A[A[A[A[A[A







 82%|████████▏ | 187/227 [00:03<00:00, 49.13it/s][A[A[A[A[A[A[A[A







 85%|████████▌ | 193/227 [00:03<00:00, 51.63it/s][A[A[A[A[A[A[A[A







 88%|████████▊ | 199/227 [00:03<00:00, 53.81it/s][A[A[A[A[A[A[A[A







 91%|█████████ | 206/227 [00:04<00:00, 55.89it/s][A[A[A[A[A[A[A[A







 93%|█████████▎| 212/227 [00:04<00:00, 55.05it/s][A[A[A[A[A[A[A[A







 96%|█████████▌| 218/227 [00:04<00:00, 51.59it/s][A[A[A[A[A[A[A[A







Epoch:  90%|█████████ | 18/20 [01:09<00:08,  4.09s/it]A[A[A[A[A[A[A





100%|██████████| 227/2

Epoch 18 | Step 4100 | Train_loss 3.3781042098999023 | Val_loss = 3.2758699357509613








 11%|█▏        | 26/227 [00:00<00:05, 37.00it/s][A[A[A[A[A[A





 14%|█▍        | 32/227 [00:00<00:04, 40.59it/s][A[A[A[A[A[A





 17%|█▋        | 39/227 [00:00<00:04, 45.20it/s][A[A[A[A[A[A





 19%|█▉        | 44/227 [00:00<00:03, 46.29it/s][A[A[A[A[A[A





 22%|██▏       | 49/227 [00:01<00:03, 45.64it/s][A[A[A[A[A[A





 24%|██▍       | 55/227 [00:01<00:03, 48.72it/s][A[A[A[A[A[A





 27%|██▋       | 61/227 [00:01<00:03, 46.32it/s][A[A[A[A[A[A





 29%|██▉       | 66/227 [00:01<00:03, 46.51it/s][A[A[A[A[A[A





 32%|███▏      | 73/227 [00:01<00:03, 50.46it/s][A[A[A[A[A[A





 35%|███▍      | 79/227 [00:01<00:02, 52.20it/s][A[A[A[A[A[A





 38%|███▊      | 86/227 [00:01<00:02, 54.75it/s][A[A[A[A[A[A





 41%|████      | 93/227 [00:01<00:02, 56.78it/s][A[A[A[A[A[A





 44%|████▎     | 99/227 [00:01<00:02, 57.58it/s][A[A[A[A[A[A





 46%|████▋     | 105/227 [00:02<00:02, 48.32it/s][A[A[A

Epoch 18 | Step 4200 | Train_loss 3.2187132835388184 | Val_loss = 3.251780182123184








 57%|█████▋    | 129/227 [00:02<00:01, 50.73it/s][A[A[A[A[A[A





 59%|█████▉    | 135/227 [00:02<00:01, 52.60it/s][A[A[A[A[A[A





 62%|██████▏   | 141/227 [00:02<00:01, 54.25it/s][A[A[A[A[A[A





 65%|██████▍   | 147/227 [00:02<00:01, 55.10it/s][A[A[A[A[A[A





 67%|██████▋   | 153/227 [00:02<00:01, 55.86it/s][A[A[A[A[A[A





 70%|███████   | 159/227 [00:03<00:01, 52.27it/s][A[A[A[A[A[A





 73%|███████▎  | 165/227 [00:03<00:01, 49.45it/s][A[A[A[A[A[A





 75%|███████▌  | 171/227 [00:03<00:01, 51.78it/s][A[A[A[A[A[A





 78%|███████▊  | 177/227 [00:03<00:01, 45.80it/s][A[A[A[A[A[A





 81%|████████  | 183/227 [00:03<00:00, 48.42it/s][A[A[A[A[A[A





 83%|████████▎ | 189/227 [00:03<00:00, 50.52it/s][A[A[A[A[A[A





 86%|████████▌ | 195/227 [00:03<00:00, 51.93it/s][A[A[A[A[A[A





 89%|████████▊ | 201/227 [00:03<00:00, 52.49it/s][A[A[A[A[A[A





 91%|█████████ | 207/227 [00:04<00:00, 53.55i

Epoch 18 | Step 4300 | Train_loss 3.2907485961914062 | Val_loss = 3.2042770385742188


Epoch:  95%|█████████▌| 19/20 [01:14<00:04,  4.23s/it]







  0%|          | 0/227 [00:00<?, ?it/s][A[A[A[A[A[A[A[A







  2%|▏         | 5/227 [00:00<00:04, 46.28it/s][A[A[A[A[A[A[A[A







  5%|▍         | 11/227 [00:00<00:04, 48.34it/s][A[A[A[A[A[A[A[A







  7%|▋         | 17/227 [00:00<00:04, 50.22it/s][A[A[A[A[A[A[A[A







 10%|█         | 23/227 [00:00<00:03, 51.53it/s][A[A[A[A[A[A[A[A







 13%|█▎        | 29/227 [00:00<00:03, 52.49it/s][A[A[A[A[A[A[A[A







 15%|█▌        | 35/227 [00:00<00:03, 53.29it/s][A[A[A[A[A[A[A[A







 18%|█▊        | 41/227 [00:00<00:03, 54.45it/s][A[A[A[A[A[A[A[A







 21%|██        | 47/227 [00:00<00:03, 49.46it/s][A[A[A[A[A[A[A[A







 23%|██▎       | 52/227 [00:01<00:03, 49.44it/s][A[A[A[A[A[A[A[A







 26%|██▌       | 59/227 [00:01<00:03, 53.21it/s][A[A[A[A[A[A[A[A







 29%|██▊       | 65/227 [00:01<00:03, 49.85it/s][A[A[A[A[A[A[

Epoch 19 | Step 4400 | Train_loss 3.138442277908325 | Val_loss = 3.1581051349639893










 44%|████▍     | 100/227 [00:01<00:02, 45.06it/s][A[A[A[A[A[A[A[A







 47%|████▋     | 106/227 [00:02<00:02, 47.90it/s][A[A[A[A[A[A[A[A







 50%|████▉     | 113/227 [00:02<00:02, 51.02it/s][A[A[A[A[A[A[A[A







 52%|█████▏    | 119/227 [00:02<00:02, 47.32it/s][A[A[A[A[A[A[A[A







 56%|█████▌    | 126/227 [00:02<00:01, 51.50it/s][A[A[A[A[A[A[A[A







 58%|█████▊    | 132/227 [00:02<00:01, 53.39it/s][A[A[A[A[A[A[A[A







 61%|██████    | 138/227 [00:02<00:01, 49.11it/s][A[A[A[A[A[A[A[A







 63%|██████▎   | 144/227 [00:02<00:01, 51.17it/s][A[A[A[A[A[A[A[A







 66%|██████▌   | 150/227 [00:02<00:01, 53.25it/s][A[A[A[A[A[A[A[A







 69%|██████▉   | 157/227 [00:03<00:01, 55.36it/s][A[A[A[A[A[A[A[A







 72%|███████▏  | 164/227 [00:03<00:01, 56.92it/s][A[A[A[A[A[A[A[A







 75%|███████▍  | 170/227 [00:03<00:00, 57.24it/s][A[A[A[A[A[A[A[A







 78%|███████▊  | 176

Epoch 19 | Step 4500 | Train_loss 3.21132755279541 | Val_loss = 3.1368328332901










 88%|████████▊ | 199/227 [00:03<00:00, 48.14it/s][A[A[A[A[A[A[A[A







 90%|█████████ | 205/227 [00:03<00:00, 50.60it/s][A[A[A[A[A[A[A[A







 93%|█████████▎| 211/227 [00:04<00:00, 46.72it/s][A[A[A[A[A[A[A[A







 96%|█████████▌| 217/227 [00:04<00:00, 48.87it/s][A[A[A[A[A[A[A[A







Epoch: 100%|██████████| 20/20 [01:18<00:00,  4.29s/it]A[A[A[A[A[A[A






100%|██████████| 227/227 [00:23<00:00, 54.75it/s][A[A[A[A[A[A[A





100%|██████████| 227/227 [00:20<00:00, 46.03it/s][A[A[A[A[A[A







100%|██████████| 227/227 [00:15<00:00, 51.40it/s][A[A[A[A[A[A[A[A

In [95]:
loss = criterion(prediction, trg)

In [96]:
loss.item()

8.685587882995605