In [73]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchtext
from torchtext import data
from torchtext import datasets
from torch.autograd import Variable

import spacy, random
import numpy as np
from tqdm import tqdm

# Some utility functions
def tokenize_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

global USE_CUDA
USE_CUDA = torch.cuda.is_available()
DEVICE = 0 if USE_CUDA else -1
MAX_LEN = 20
MIN_FREQ = 5
BATCH_SIZE = 64

In [2]:
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

DE = data.Field(tokenize=tokenize_de)
EN = data.Field(tokenize=tokenize_en, init_token = '<s>', eos_token = '</s>') # only target needs BOS/EOS
train, val, test = datasets.IWSLT.splits(exts=('.de', '.en'), fields=(DE, EN), filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN)

DE.build_vocab(train.src, min_freq=MIN_FREQ)
EN.build_vocab(train.trg, min_freq=MIN_FREQ)

train_iter, val_iter = data.BucketIterator.splits((train, val), batch_size=BATCH_SIZE, device=DEVICE, repeat=False, sort_key=lambda x: len(x.src))

def str_to_tensor(string, src_lang = DE):
    string = string.split()
    word_ids = [src_lang.vocab.stoi[word] for word in string]
    word_tensor = Variable(torch.LongTensor(word_ids))
    return word_tensor
    
def tensor_to_kaggle(tensor, trg_lang = EN):
    return '|'.join([trg_lang.vocab.itos[word_id] for word_id in tensor])
    
def tensor_to_str(tensor, trg_lang = EN):
    return ' '.join([trg_lang.vocab.itos[word_id] for word_id in tensor])

In [336]:
class Encoder(nn.Module):
    """ Module for encoding source sentences """
    def __init__(self, src_vsize, hidden_dim, n_layers = 1):
        super(Encoder, self).__init__()
        
        self.src_vsize = src_vsize
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.embeddings = nn.Embedding(src_vsize, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers = n_layers, batch_first = False)
        
    def forward(self, src_words):
        embedded = self.embeddings(src_words)
        out, hdn = self.lstm(embedded)
        return out, hdn

class Attention(nn.Module):
    """ Module for computing attention. Concat method omitted due to poor performance in paper
    and slow computation time due to the incompatibility of the concatenation operations with
    efficiency optimization via batch operations
    """
    def __init__(self, method, hidden_dim, batch_size = 1):
        super(Attention, self).__init__()
        
        self.method = method
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_dim, self.hidden_dim)

        elif self.method != 'dot':
            print("No method chosen from ['general', 'dot']. Defaulting to 'dot'.")
            self.method = 'dot'

    def forward(self, hidden, encoder_outputs):
        """ Get attention weights """
        attn_energies = self.score(hidden, encoder_outputs)
        return F.softmax(attn_energies, dim = 2)
    
    def score(self, decoder_outputs, encoder_outputs):
        """ 
        Dot (fastest method): a dot product between the decoder hidden state and the encoder final hidden state 
        General: a dot product between the decoder hidden state and a linear transform of the encoder final hidden state
        """
        
        if self.method == 'general':
            energy = self.attn(encoder_outputs.transpose(1,2)).transpose(1,2)
            energy = torch.bmm(decoder_outputs, energy)
            return energy

        elif self.method == 'dot':
            energy = torch.bmm(decoder_outputs, encoder_outputs)
            return energy

class AttentionDecoder(nn.Module):
    def __init__(self, hidden_dim, trg_vsize, n_layers = 1, dropout_p=0.1, method = 'dot'):
        super(AttentionDecoder, self).__init__()
        
        # Define parameters
        self.hidden_dim = hidden_dim
        self.trg_vsize = trg_vsize
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        
        # Define layers
        self.embedding = nn.Embedding(trg_vsize, hidden_dim)
        self.dropout = nn.Dropout(dropout_p)
        self.attn = Attention(method, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, n_layers, dropout=dropout_p)
        self.downsample = nn.Linear(hidden_dim * 2, hidden_dim)
        self.proj = nn.Linear(hidden_dim, trg_vsize)
    
    def forward(self, trg_words, hidden, encoder_outputs):
        # Get the embedding of the current input word (last output word)
        embedded = self.embedding(trg_words) 
        embedded = self.dropout(embedded)

        # Get hidden state from input word and last hidden state
        decoder_outputs, new_hidden = self.lstm(embedded, hidden)

        # Calculate attention from current lstm out and encoder output
        attn_weights = self.attn(decoder_outputs.transpose(0,1), encoder_outputs.transpose(0,1).transpose(1,2))
        context = torch.bmm(attn_weights, encoder_outputs.transpose(0, 1)).transpose(0, 1)

        # (Luong eq. 5)
        concat_input = torch.cat((decoder_outputs, context), 2)
        concat_output = F.tanh(self.downsample(concat_input))

        # (Luong eq. 6, no more softmax)
        output = self.proj(concat_output)
        return output, new_hidden, attn_weights.transpose(0,1) 
        
class LuongAttention(nn.Module):
    """ Super class """
    def __init__(self, src_vsize, trg_vsize, hidden_dim, n_layers = 1, method = 'dot'):
        super(LuongAttention, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.encoder = Encoder(src_vsize, hidden_dim)
        self.decoder = AttentionDecoder(hidden_dim, trg_vsize, method = method)

In [None]:
class Trainer:
    def __init__(self, train_iter, val_iter):
        """ Initialize trainer class with Torchtext iterators """
        self.train_iter = train_iter
        self.val_iter = val_iter
        
    def train(self, num_epochs, model, lr = 1, clip = 5):
        """ Train model using SGD """
        parameters = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = torch.optim.SGD(params = parameters, lr=lr)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, range(8, num_epochs), gamma=0.50, last_epoch=-1)
        best_ppl = 100
        
        # Binary masking
        weight = torch.cuda.FloatTensor(len(EN.vocab.itos)).fill_(1) if USE_CUDA else torch.FloatTensor(len(EN.vocab.itos)).fill_(1)
        self.padding_id = EN.vocab.stoi[EN.pad_token]
        weight[self.padding_id] = 0
        weight = Variable(weight)
        if USE_CUDA: 
            weight = weight.cuda()

        criterion = nn.CrossEntropyLoss(weight = weight, size_average = False)
        
        all_losses = []
        for epoch in tqdm(range(1, num_epoc4hs + 1)):
            scheduler.step()
            model.train()
            epoch_loss = []
            for batch in tqdm(self.train_iter):
                
                optimizer.zero_grad()

                batch_loss = self.train_batch(batch, criterion, model, teacher_forcing_ratio=0)
                batch_loss.backward()

                nn.utils.clip_grad_norm(model.parameters(), clip)

                optimizer.step()

                epoch_loss.append(batch_loss.data[0])
                                                
                if len(epoch_loss) % 200 == 0:
                    step = len(epoch_loss)
                    cur_loss = np.mean(epoch_loss)
                    train_ppl = np.exp(np.mean(epoch_loss))
                    print('Step: {0} | Loss: {1} | Train PPL: {2}'.format(step, cur_loss, train_ppl))
                    print('Wie würde eine solche Zukunft aussehen ? -->', self.translate('Wie würde eine solche Zukunft aussehen ?', model)[0])
                    print('Als ich in meinen 20ern war , hatte ich meine erste Psychotherapie-Patientin . -->', self.translate('Als ich in meinen 20ern war , hatte ich meine erste Psychotherapie-Patientin .', model)[0])
                    print('Ich war Doktorandin und studierte Klinische Psychologie in Berkeley . -->', self.translate('Ich war Doktorandin und studierte Klinische Psychologie in Berkeley .', model)[0])
                    print('Sie war eine 26-jÃ¤hrige Frau namens Alex . -->', self.translate('Sie war eine 26-jÃ¤hrige Frau namens Alex .', model)[0])
                    print('Und als ich das hÃ¶rte , war ich erleichtert . -->', self.translate('Und als ich das hÃ¶rte , war ich erleichtert .', model)[0])

            epoch_loss = np.mean(epoch_loss)
            train_ppl = np.exp(epoch_loss)
            val_ppl = self.validate(criterion, model)

            print('Epoch: {0} | Loss: {1} | Train PPL: {2} | Val PPL: {3}'.format(epoch, epoch_loss, train_ppl, val_ppl))
            all_losses.append(epoch_loss)
            
            # early stopping
            if val_ppl < best_ppl:
                best_ppl = val_ppl
                self.best_model = model
                
        best_model = self.best_model
        torch.save(best_model.cpu(), best_model.__class__.__name__ + ".pth")
        return best_model.cpu(), all_losses     
                
    def train_batch(self, batch, criterion, model, teacher_forcing_ratio = 0):
        """ Compute training batch using teacher forcing """
        # Initialize batch loss to zero
        target_length = batch.trg.size()[0]

        # Run words through encoder
        encoder_outputs, decoder_hidden = model.encoder(batch.src)

        # Use teacher forcing or not
        use_teacher_forcing = random.random() > teacher_forcing_ratio
        if use_teacher_forcing:

            # With teacher forcing, we use the previous true target as the next word input.
            # This allows us to batch the softmax, resulting in large speed-ups.
            shift = Variable(torch.LongTensor(batch.batch_size).fill_(self.padding_id)).unsqueeze(0)
            if USE_CUDA:
                shift = shift.cuda()

            # Get outputs for batch, using encoder hidden as initialization for decoder hidden
            decoder_outputs, decoder_hidden, decoder_attn = model.decoder(batch.trg, decoder_hidden, encoder_outputs)

            # Reshape outputs, add shift tensor to targets
            preds = decoder_outputs.view(target_length * batch.batch_size, -1)
            targets = torch.cat((batch.trg[1:], shift), dim = 0).view(-1)

            # Compute loss in a batch (more efficient than loop)
            loss = F.cross_entropy(preds, targets)
            return loss
            
        else:
            
            # Without teacher forcing: use network's own prediction as the next input
            # Prepare input and output variables
            loss = 0
            decoder_inputs = batch.trg[0, :].unsqueeze(0)
            
            for trg_word_idx in range(target_length-1):
                decoder_output, decoder_hidden, decoder_attn = model.decoder(decoder_inputs, decoder_hidden, encoder_outputs)

                # Get most likely word index (highest value) from output
                topk_probs, topk_words_idx = decoder_output.data.topk(1, dim = 2)

                # Chosen words are next input
                decoder_inputs = Variable(topk_words_idx).squeeze(2)

                # Compute loss for all words in batch
                num_words = batch.trg[trg_word_idx+1, :].ne(self.padding_id).sum().float()
                loss += (criterion(decoder_output.squeeze(0), batch.trg[trg_word_idx+1, :]) / num_words) if num_words.data[0] > 0 else 0

            loss /= batch.batch_size
            return loss
    
    def translate(self, string, model, maxlength = None):  
        """ Predict translation for an input string """
        # Make string a tensor
        tensor = str_to_tensor(string)
        tensor = tensor.unsqueeze(1)
        if USE_CUDA:
            tensor = tensor.cuda()

        # Run words through encoder
        encoder_outputs, decoder_hidden = model.encoder(tensor)

        # First token must always start of sentence <s>
        decoder_inputs = Variable(torch.LongTensor([EN.vocab.stoi[EN.init_token]])).unsqueeze(0)
        if USE_CUDA: 
            decoder_inputs = decoder_inputs.cuda()

        # if no maxlength, let it be 3*length original
        maxlength = maxlength if maxlength else 3 * tensor.shape[0]
        out_string = []

        # Predict words until an <eos> token or maxlength,
        for trg_word_idx in range(maxlength):
            decoder_output, decoder_hidden, decoder_attn = model.decoder(decoder_inputs, decoder_hidden, encoder_outputs)

            # Get most likely word index (highest value) from output
            prob_dist = F.log_softmax(decoder_output, dim = 2)
            top_probs, top_word_idx = prob_dist.data.topk(1, dim = 2)
            ni = top_word_idx.squeeze(0)

            decoder_inputs = Variable(ni) # Chosen word is next input
            out_string.append(ni[0][0])

            # Stop at end of sentence (not necessary when using known targets)
            if ni[0][0] == EN.vocab.stoi[EN.eos_token]: 
                break

        out_string = tensor_to_str(out_string)
        return out_string, decoder_attn
    
    def evaluate_kaggle(self, string, model, ngrams = 3, context = 0, top_k = 100):
        """ Beam search the best starting trigrams for Kaggle input sentences """
        # Convert string to tensor for embedding lookups
        tensor = str_to_tensor(string)
        tensor = tensor.unsqueeze(1)
        if USE_CUDA:
            tensor = tensor.cuda()

        # Run words through encoder to get init hidden for decoder
        encoder_outputs, encoder_hidden = model.encoder(tensor)

        # Start collecting hiddens, prepare initial input variables
        decoder_inputs = Variable(torch.LongTensor([EN.vocab.stoi[EN.init_token]])).unsqueeze(0)
        if USE_CUDA: 
            decoder_inputs = decoder_inputs.cuda()

        # Compute the top K first words, so that we have something to work with
        decoder_output, decoder_hidden, dec_attn = model.decoder(decoder_inputs, encoder_hidden, encoder_outputs)
        prob_dist = F.log_softmax(decoder_output, dim = 2)
        top_probs, top_word_idx = prob_dist.data.topk(top_k, dim = 2)
        decoder_inputs = Variable(top_word_idx)
        if USE_CUDA:
            decoder_inputs = decoder_inputs.cuda()

        # Begin table to keep our outputs, output_probs
        outputs = [[word] for word in list(decoder_inputs.data[0][0])]
        output_probs = list(top_probs[0][0])

        # For using the correct hidden to predict next word. Initially it is 100x copy
        all_hiddens = [decoder_hidden for _ in range(top_k)]

        # Get top_k beams for 
        for trg_word_idx in range(1, ngrams+context):
            beam_search_idx, beam_search_probs = [], []
            for k in range(top_k):
                decoder_output, new_hdn, dec_attn = model.decoder(decoder_inputs[:, :, k], all_hiddens[k], encoder_outputs)
                prob_dist = F.log_softmax(decoder_output, dim = 2)
                top_probs, top_word_idx = prob_dist.data.topk(top_k, dim = 2)
                beam_search_idx.append(list(top_word_idx[0][0]))
                beam_search_probs.append(list(top_probs[0][0]))
                all_hiddens[k] = new_hdn

            # Top K words idx
            next_word_idx = np.argsort(np.hstack(beam_search_probs))[::-1][:top_k] 

            # Backpointers to the input word that each top word was drawn from
            back_pointers = [int(np.floor(word / top_k)) for word in next_word_idx] 

            # Update output list with new decoder inputs and their corresponding probabilities
            next_words = [np.hstack(beam_search_idx)[ids] for ids in next_word_idx]
            next_probs = [np.hstack(beam_search_probs)[ids] for ids in next_word_idx]
            decoder_inputs = Variable(torch.LongTensor([int(word) for word in next_words])).unsqueeze(0).unsqueeze(0)
            if USE_CUDA:
                decoder_inputs = decoder_inputs.cuda()

            # update hiddens, outputs
            all_hiddens = [all_hiddens[pointer] for pointer in back_pointers]
            outputs = [outputs[pointer] + [word] for pointer, word in zip(back_pointers, next_words)]
            output_probs = [output_probs[pointer] + new_p for pointer, new_p in zip(back_pointers, next_probs)]

        prob_sort_idx = np.argsort(output_probs)[::-1]
        outputs = [outputs[idx] for idx in prob_sort_idx]
        outputs = [output[:ngrams] for output in outputs]
        out = [tensor_to_kaggle(tsr) for tsr in outputs]
        return ' '.join(out)
        
    def validate(self, criterion, model):
        """ Compute validation set perplexity """
        loss = []
        for batch in tqdm(self.val_iter):
            batch_loss = self.train_batch(batch, criterion, model)
            loss.append(batch_loss.data[0])
        
        val_ppl = np.exp(np.mean(loss))
        return val_ppl
    
    def write_kaggle(self, test_file, model):
        """ Write outputs to kaggle """
        with open(test_file, 'r') as fh:
            datasource = fh.read().splitlines()
            
        print('Evaluating on {0}...'.format(test_file))
        with open('output.txt', 'w') as fh:
            fh.write('id,word\n')
            for idx, string in tqdm(enumerate(datasource)):
                output = self.evaluate_kaggle(string, model)
                output = str(idx+1) + ',' + self.escape_kaggle(output) + '\n'
                fh.write(output)
        print('File saved.')
        
    def escape_kaggle(self, l):
        """ So kaggle doesn't yell at you when submitting results """
        return l.replace("\"", "<quote>").replace(",", "<comma>")

In [None]:
model = LuongAttention(src_vsize = len(DE.vocab.itos), trg_vsize = len(EN.vocab.itos), hidden_dim = 200, n_layers = 2)
trainer = Trainer(train_iter, val_iter)
if USE_CUDA: model = model.cuda()
print('Using cuda: ', np.all([param.is_cuda for param in model.parameters()]))
model, all_losses = trainer.train(20, model)
if USE_CUDA: model = model.cuda() # don't know why but this will throw errors on GPU otherwise if not re-cuda'd
trainer.write_kaggle('../data/source_test.txt', model)
torch.save(model.cpu(), model.__class__.__name__ + ".pth")

  0%|          | 0/12 [00:00<?, ?it/s]
  0%|          | 0/1861 [00:00<?, ?it/s][A

Using cuda:  False



  0%|          | 1/1861 [00:01<49:25,  1.59s/it][A
  0%|          | 2/1861 [00:02<38:45,  1.25s/it][A
  0%|          | 3/1861 [00:03<33:55,  1.10s/it][A
  0%|          | 4/1861 [00:03<30:33,  1.01it/s][A
  0%|          | 5/1861 [00:04<29:28,  1.05it/s][A
  0%|          | 6/1861 [00:05<28:32,  1.08it/s][A
  0%|          | 7/1861 [00:06<27:15,  1.13it/s][A
  0%|          | 8/1861 [00:07<27:04,  1.14it/s][A
  0%|          | 9/1861 [00:07<26:26,  1.17it/s][A
  1%|          | 10/1861 [00:08<25:50,  1.19it/s][A
  1%|          | 11/1861 [00:09<25:47,  1.20it/s][A
  1%|          | 12/1861 [00:10<26:08,  1.18it/s][A
  1%|          | 13/1861 [00:10<25:54,  1.19it/s][A
  1%|          | 14/1861 [00:11<25:05,  1.23it/s][A
  1%|          | 15/1861 [00:12<25:07,  1.22it/s][A
  1%|          | 16/1861 [00:12<24:56,  1.23it/s][A
  1%|          | 17/1861 [00:13<24:59,  1.23it/s][A
  1%|          | 18/1861 [00:14<24:55,  1.23it/s][A
  1%|          | 19/1861 [00:15<24:45,  1.24it/s][A
 

Step: 100 | Loss: 4.535990056991577 | Train PPL: 93.31585760205613
Wie würde eine solche Zukunft aussehen ? --> And , , , , , , , , . </s>



  5%|▌         | 101/1861 [01:12<21:07,  1.39it/s][A
  5%|▌         | 102/1861 [01:13<21:10,  1.38it/s][A
  6%|▌         | 103/1861 [01:14<21:08,  1.39it/s][A
  6%|▌         | 104/1861 [01:15<21:09,  1.38it/s][A
  6%|▌         | 105/1861 [01:15<21:10,  1.38it/s][A
  6%|▌         | 106/1861 [01:16<21:11,  1.38it/s][A
  6%|▌         | 107/1861 [01:17<21:07,  1.38it/s][A
  6%|▌         | 108/1861 [01:17<21:05,  1.39it/s][A
  6%|▌         | 109/1861 [01:18<21:05,  1.38it/s][A
  6%|▌         | 110/1861 [01:19<21:05,  1.38it/s][A
  6%|▌         | 111/1861 [01:20<21:02,  1.39it/s][A
  6%|▌         | 112/1861 [01:20<21:03,  1.38it/s][A
  6%|▌         | 113/1861 [01:21<20:59,  1.39it/s][A
  6%|▌         | 114/1861 [01:22<20:59,  1.39it/s][A
  6%|▌         | 115/1861 [01:22<20:57,  1.39it/s][A
  6%|▌         | 116/1861 [01:23<20:56,  1.39it/s][A
  6%|▋         | 117/1861 [01:24<20:56,  1.39it/s][A
  6%|▋         | 118/1861 [01:25<20:57,  1.39it/s][A
  6%|▋         | 119/1861 [

Step: 200 | Loss: 4.02700559079647 | Train PPL: 56.09269503646751
Wie würde eine solche Zukunft aussehen ? --> And I the <unk> , I the <unk> . </s>



 11%|█         | 201/1861 [02:25<19:59,  1.38it/s][A
 11%|█         | 202/1861 [02:25<19:58,  1.38it/s][A
 11%|█         | 203/1861 [02:26<19:57,  1.38it/s][A
 11%|█         | 204/1861 [02:27<19:56,  1.38it/s][A
 11%|█         | 205/1861 [02:28<19:56,  1.38it/s][A
 11%|█         | 206/1861 [02:28<19:54,  1.39it/s][A
 11%|█         | 207/1861 [02:29<19:54,  1.38it/s][A
 11%|█         | 208/1861 [02:30<19:53,  1.38it/s][A
 11%|█         | 209/1861 [02:30<19:52,  1.39it/s][A
 11%|█▏        | 210/1861 [02:31<19:51,  1.39it/s][A
 11%|█▏        | 211/1861 [02:32<19:50,  1.39it/s][A
 11%|█▏        | 212/1861 [02:32<19:50,  1.39it/s][A
 11%|█▏        | 213/1861 [02:33<19:46,  1.39it/s][A
 11%|█▏        | 214/1861 [02:34<19:46,  1.39it/s][A
 12%|█▏        | 215/1861 [02:34<19:44,  1.39it/s][A
 12%|█▏        | 216/1861 [02:35<19:42,  1.39it/s][A
 12%|█▏        | 217/1861 [02:35<19:40,  1.39it/s][A
 12%|█▏        | 218/1861 [02:36<19:38,  1.39it/s][A
 12%|█▏        | 219/1861 [

Step: 300 | Loss: 3.785667298237483 | Train PPL: 44.06506529053575
Wie würde eine solche Zukunft aussehen ? --> What you do n't do n't do ? </s>



 16%|█▌        | 301/1861 [03:36<18:41,  1.39it/s][A
 16%|█▌        | 302/1861 [03:37<18:41,  1.39it/s][A
 16%|█▋        | 303/1861 [03:37<18:40,  1.39it/s][A
 16%|█▋        | 304/1861 [03:38<18:40,  1.39it/s][A
 16%|█▋        | 305/1861 [03:39<18:40,  1.39it/s][A
 16%|█▋        | 306/1861 [03:40<18:40,  1.39it/s][A
 16%|█▋        | 307/1861 [03:41<18:39,  1.39it/s][A
 17%|█▋        | 308/1861 [03:42<18:39,  1.39it/s][A
 17%|█▋        | 309/1861 [03:42<18:38,  1.39it/s][A
 17%|█▋        | 310/1861 [03:43<18:37,  1.39it/s][A
 17%|█▋        | 311/1861 [03:44<18:37,  1.39it/s][A
 17%|█▋        | 312/1861 [03:44<18:36,  1.39it/s][A
 17%|█▋        | 313/1861 [03:45<18:36,  1.39it/s][A
 17%|█▋        | 314/1861 [03:46<18:35,  1.39it/s][A
 17%|█▋        | 315/1861 [03:47<18:35,  1.39it/s][A
 17%|█▋        | 316/1861 [03:48<18:35,  1.38it/s][A
 17%|█▋        | 317/1861 [03:48<18:34,  1.39it/s][A
 17%|█▋        | 318/1861 [03:49<18:34,  1.38it/s][A
 17%|█▋        | 319/1861 [

Step: 400 | Loss: 3.6129940193891525 | Train PPL: 37.07689574382876
Wie würde eine solche Zukunft aussehen ? --> What we do n't do n't do ? </s>



 22%|██▏       | 401/1861 [04:48<17:31,  1.39it/s][A
 22%|██▏       | 402/1861 [04:49<17:30,  1.39it/s][A
 22%|██▏       | 403/1861 [04:50<17:29,  1.39it/s][A
 22%|██▏       | 404/1861 [04:50<17:28,  1.39it/s][A
 22%|██▏       | 405/1861 [04:51<17:27,  1.39it/s][A
 22%|██▏       | 406/1861 [04:52<17:26,  1.39it/s][A
 22%|██▏       | 407/1861 [04:52<17:25,  1.39it/s][A
 22%|██▏       | 408/1861 [04:53<17:25,  1.39it/s][A
 22%|██▏       | 409/1861 [04:54<17:24,  1.39it/s][A
 22%|██▏       | 410/1861 [04:54<17:23,  1.39it/s][A
 22%|██▏       | 411/1861 [04:55<17:22,  1.39it/s][A
 22%|██▏       | 412/1861 [04:56<17:21,  1.39it/s][A
 22%|██▏       | 413/1861 [04:56<17:20,  1.39it/s][A
 22%|██▏       | 414/1861 [04:57<17:20,  1.39it/s][A
 22%|██▏       | 415/1861 [04:58<17:20,  1.39it/s][A
 22%|██▏       | 416/1861 [04:59<17:20,  1.39it/s][A
 22%|██▏       | 417/1861 [04:59<17:18,  1.39it/s][A
 22%|██▏       | 418/1861 [05:00<17:18,  1.39it/s][A
 23%|██▎       | 419/1861 [

Step: 500 | Loss: 3.490727802157402 | Train PPL: 32.80981807510426 | Val PPL: 17.194137870216718
Wie würde eine solche Zukunft aussehen ? --> What 's a lot of the world ? </s>



 27%|██▋       | 501/1861 [06:02<16:25,  1.38it/s][A
 27%|██▋       | 502/1861 [06:03<16:23,  1.38it/s][A
 27%|██▋       | 503/1861 [06:04<16:23,  1.38it/s][A
 27%|██▋       | 504/1861 [06:05<16:22,  1.38it/s][A
 27%|██▋       | 505/1861 [06:05<16:22,  1.38it/s][A
 27%|██▋       | 506/1861 [06:06<16:21,  1.38it/s][A
 27%|██▋       | 507/1861 [06:07<16:21,  1.38it/s][A
 27%|██▋       | 508/1861 [06:08<16:20,  1.38it/s][A
 27%|██▋       | 509/1861 [06:08<16:19,  1.38it/s][A
 27%|██▋       | 510/1861 [06:09<16:19,  1.38it/s][A
 27%|██▋       | 511/1861 [06:10<16:19,  1.38it/s][A
 28%|██▊       | 512/1861 [06:11<16:18,  1.38it/s][A
 28%|██▊       | 513/1861 [06:11<16:17,  1.38it/s][A
 28%|██▊       | 514/1861 [06:12<16:16,  1.38it/s][A
 28%|██▊       | 515/1861 [06:13<16:16,  1.38it/s][A
 28%|██▊       | 516/1861 [06:14<16:15,  1.38it/s][A
 28%|██▊       | 517/1861 [06:14<16:14,  1.38it/s][A
 28%|██▊       | 518/1861 [06:15<16:14,  1.38it/s][A
 28%|██▊       | 519/1861 [

Step: 600 | Loss: 3.3945387574036916 | Train PPL: 29.800904857728952
Wie würde eine solche Zukunft aussehen ? --> What 's a lot of the <unk> ? </s>



 32%|███▏      | 601/1861 [07:14<15:10,  1.38it/s][A
 32%|███▏      | 602/1861 [07:14<15:09,  1.38it/s][A
 32%|███▏      | 603/1861 [07:15<15:09,  1.38it/s][A
 32%|███▏      | 604/1861 [07:16<15:08,  1.38it/s][A
 33%|███▎      | 605/1861 [07:17<15:07,  1.38it/s][A
 33%|███▎      | 606/1861 [07:18<15:07,  1.38it/s][A
 33%|███▎      | 607/1861 [07:18<15:06,  1.38it/s][A
 33%|███▎      | 608/1861 [07:19<15:06,  1.38it/s][A
 33%|███▎      | 609/1861 [07:20<15:05,  1.38it/s][A
 33%|███▎      | 610/1861 [07:21<15:04,  1.38it/s][A
 33%|███▎      | 611/1861 [07:21<15:03,  1.38it/s][A
 33%|███▎      | 612/1861 [07:22<15:02,  1.38it/s][A
 33%|███▎      | 613/1861 [07:23<15:02,  1.38it/s][A
 33%|███▎      | 614/1861 [07:24<15:01,  1.38it/s][A
 33%|███▎      | 615/1861 [07:24<15:01,  1.38it/s][A
 33%|███▎      | 616/1861 [07:25<15:00,  1.38it/s][A
 33%|███▎      | 617/1861 [07:26<14:59,  1.38it/s][A
 33%|███▎      | 618/1861 [07:27<14:59,  1.38it/s][A
 33%|███▎      | 619/1861 [

Step: 700 | Loss: 3.3131849087987626 | Train PPL: 27.472483639027292
Wie würde eine solche Zukunft aussehen ? --> How do you have a lot of the world ? </s>



 38%|███▊      | 701/1861 [08:26<13:57,  1.38it/s][A
 38%|███▊      | 702/1861 [08:27<13:57,  1.38it/s][A
 38%|███▊      | 703/1861 [08:27<13:56,  1.38it/s][A
 38%|███▊      | 704/1861 [08:28<13:55,  1.39it/s][A
 38%|███▊      | 705/1861 [08:29<13:54,  1.39it/s][A
 38%|███▊      | 706/1861 [08:29<13:54,  1.38it/s][A
 38%|███▊      | 707/1861 [08:30<13:53,  1.38it/s][A
 38%|███▊      | 708/1861 [08:31<13:52,  1.38it/s][A
 38%|███▊      | 709/1861 [08:32<13:52,  1.38it/s][A
 38%|███▊      | 710/1861 [08:32<13:50,  1.39it/s][A
 38%|███▊      | 711/1861 [08:33<13:50,  1.38it/s][A
 38%|███▊      | 712/1861 [08:34<13:49,  1.38it/s][A
 38%|███▊      | 713/1861 [08:34<13:48,  1.39it/s][A
 38%|███▊      | 714/1861 [08:35<13:47,  1.39it/s][A
 38%|███▊      | 715/1861 [08:35<13:47,  1.39it/s][A
 38%|███▊      | 716/1861 [08:36<13:46,  1.39it/s][A
 39%|███▊      | 717/1861 [08:37<13:45,  1.39it/s][A
 39%|███▊      | 718/1861 [08:37<13:44,  1.39it/s][A
 39%|███▊      | 719/1861 [

Step: 800 | Loss: 3.2451553720980884 | Train PPL: 25.665697483720013
Wie würde eine solche Zukunft aussehen ? --> What does a lot of the same ? </s>



 43%|████▎     | 801/1861 [09:37<12:44,  1.39it/s][A
 43%|████▎     | 802/1861 [09:38<12:44,  1.39it/s][A
 43%|████▎     | 803/1861 [09:39<12:43,  1.39it/s][A
 43%|████▎     | 804/1861 [09:40<12:42,  1.39it/s][A
 43%|████▎     | 805/1861 [09:40<12:41,  1.39it/s][A
 43%|████▎     | 806/1861 [09:41<12:41,  1.39it/s][A
 43%|████▎     | 807/1861 [09:42<12:40,  1.39it/s][A
 43%|████▎     | 808/1861 [09:42<12:39,  1.39it/s][A
 43%|████▎     | 809/1861 [09:43<12:38,  1.39it/s][A
 44%|████▎     | 810/1861 [09:44<12:38,  1.39it/s][A
 44%|████▎     | 811/1861 [09:45<12:37,  1.39it/s][A
 44%|████▎     | 812/1861 [09:45<12:36,  1.39it/s][A
 44%|████▎     | 813/1861 [09:46<12:36,  1.39it/s][A
 44%|████▎     | 814/1861 [09:47<12:35,  1.39it/s][A
 44%|████▍     | 815/1861 [09:48<12:34,  1.39it/s][A
 44%|████▍     | 816/1861 [09:48<12:34,  1.39it/s][A
 44%|████▍     | 817/1861 [09:49<12:33,  1.39it/s][A
 44%|████▍     | 818/1861 [09:50<12:32,  1.39it/s][A
 44%|████▍     | 819/1861 [

Step: 900 | Loss: 3.1853006286753547 | Train PPL: 24.174554877945674
Wie würde eine solche Zukunft aussehen ? --> What do you have a lot of the other . </s>



 48%|████▊     | 901/1861 [10:50<11:33,  1.39it/s][A
 48%|████▊     | 902/1861 [10:50<11:32,  1.39it/s][A
 49%|████▊     | 903/1861 [10:51<11:31,  1.39it/s][A
 49%|████▊     | 904/1861 [10:52<11:30,  1.39it/s][A
 49%|████▊     | 905/1861 [10:53<11:30,  1.39it/s][A
 49%|████▊     | 906/1861 [10:54<11:29,  1.38it/s][A
 49%|████▊     | 907/1861 [10:54<11:28,  1.39it/s][A
 49%|████▉     | 908/1861 [10:55<11:28,  1.39it/s][A
 49%|████▉     | 909/1861 [10:56<11:27,  1.39it/s][A
 49%|████▉     | 910/1861 [10:57<11:26,  1.39it/s][A
 49%|████▉     | 911/1861 [10:57<11:26,  1.38it/s][A
 49%|████▉     | 912/1861 [10:58<11:25,  1.38it/s][A
 49%|████▉     | 913/1861 [10:59<11:24,  1.38it/s][A
 49%|████▉     | 914/1861 [11:00<11:24,  1.38it/s][A
 49%|████▉     | 915/1861 [11:01<11:23,  1.38it/s][A
 49%|████▉     | 916/1861 [11:02<11:23,  1.38it/s][A
 49%|████▉     | 917/1861 [11:02<11:22,  1.38it/s][A
 49%|████▉     | 918/1861 [11:03<11:21,  1.38it/s][A
 49%|████▉     | 919/1861 [

Step: 1000 | Loss: 3.125590346932411 | Train PPL: 22.773335294577585 | Val PPL: 11.76765564337716
Wie würde eine solche Zukunft aussehen ? --> What do you have a lot of the world ? </s>



 54%|█████▍    | 1001/1861 [12:09<10:26,  1.37it/s][A
 54%|█████▍    | 1002/1861 [12:09<10:25,  1.37it/s][A
 54%|█████▍    | 1003/1861 [12:10<10:24,  1.37it/s][A
 54%|█████▍    | 1004/1861 [12:11<10:24,  1.37it/s][A
 54%|█████▍    | 1005/1861 [12:11<10:23,  1.37it/s][A
 54%|█████▍    | 1006/1861 [12:12<10:22,  1.37it/s][A
 54%|█████▍    | 1007/1861 [12:13<10:21,  1.37it/s][A
 54%|█████▍    | 1008/1861 [12:13<10:20,  1.37it/s][A
 54%|█████▍    | 1009/1861 [12:14<10:20,  1.37it/s][A
 54%|█████▍    | 1010/1861 [12:15<10:19,  1.37it/s][A
 54%|█████▍    | 1011/1861 [12:15<10:18,  1.37it/s][A
 54%|█████▍    | 1012/1861 [12:16<10:17,  1.37it/s][A
 54%|█████▍    | 1013/1861 [12:17<10:17,  1.37it/s][A
 54%|█████▍    | 1014/1861 [12:17<10:16,  1.37it/s][A
 55%|█████▍    | 1015/1861 [12:18<10:15,  1.37it/s][A
 55%|█████▍    | 1016/1861 [12:19<10:14,  1.37it/s][A
 55%|█████▍    | 1017/1861 [12:20<10:14,  1.37it/s][A
 55%|█████▍    | 1018/1861 [12:20<10:13,  1.37it/s][A
 55%|████

Step: 1100 | Loss: 3.073961719382893 | Train PPL: 21.627414924297685
Wie würde eine solche Zukunft aussehen ? --> What does a lot of people like ? </s>



 59%|█████▉    | 1101/1861 [13:19<09:12,  1.38it/s][A
 59%|█████▉    | 1102/1861 [13:20<09:11,  1.38it/s][A
 59%|█████▉    | 1103/1861 [13:21<09:10,  1.38it/s][A
 59%|█████▉    | 1104/1861 [13:22<09:09,  1.38it/s][A
 59%|█████▉    | 1105/1861 [13:22<09:09,  1.38it/s][A
 59%|█████▉    | 1106/1861 [13:23<09:08,  1.38it/s][A
 59%|█████▉    | 1107/1861 [13:24<09:07,  1.38it/s][A
 60%|█████▉    | 1108/1861 [13:25<09:07,  1.38it/s][A
 60%|█████▉    | 1109/1861 [13:25<09:06,  1.38it/s][A
 60%|█████▉    | 1110/1861 [13:26<09:05,  1.38it/s][A
 60%|█████▉    | 1111/1861 [13:26<09:04,  1.38it/s][A
 60%|█████▉    | 1112/1861 [13:27<09:03,  1.38it/s][A
 60%|█████▉    | 1113/1861 [13:28<09:03,  1.38it/s][A
 60%|█████▉    | 1114/1861 [13:28<09:02,  1.38it/s][A
 60%|█████▉    | 1115/1861 [13:29<09:01,  1.38it/s][A
 60%|█████▉    | 1116/1861 [13:30<09:01,  1.38it/s][A
 60%|██████    | 1117/1861 [13:31<09:00,  1.38it/s][A
 60%|██████    | 1118/1861 [13:31<08:59,  1.38it/s][A
 60%|████

Step: 1200 | Loss: 3.028526108612617 | Train PPL: 20.666749586369473
Wie würde eine solche Zukunft aussehen ? --> How do you have a lot of people ? </s>



 65%|██████▍   | 1201/1861 [14:30<07:58,  1.38it/s][A
 65%|██████▍   | 1202/1861 [14:30<07:57,  1.38it/s][A
 65%|██████▍   | 1203/1861 [14:31<07:56,  1.38it/s][A
 65%|██████▍   | 1204/1861 [14:32<07:56,  1.38it/s][A
 65%|██████▍   | 1205/1861 [14:33<07:55,  1.38it/s][A
 65%|██████▍   | 1206/1861 [14:33<07:54,  1.38it/s][A
 65%|██████▍   | 1207/1861 [14:34<07:53,  1.38it/s][A
 65%|██████▍   | 1208/1861 [14:35<07:53,  1.38it/s][A
 65%|██████▍   | 1209/1861 [14:36<07:52,  1.38it/s][A
 65%|██████▌   | 1210/1861 [14:36<07:51,  1.38it/s][A
 65%|██████▌   | 1211/1861 [14:37<07:51,  1.38it/s][A
 65%|██████▌   | 1212/1861 [14:38<07:50,  1.38it/s][A
 65%|██████▌   | 1213/1861 [14:39<07:49,  1.38it/s][A
 65%|██████▌   | 1214/1861 [14:39<07:48,  1.38it/s][A
 65%|██████▌   | 1215/1861 [14:40<07:48,  1.38it/s][A
 65%|██████▌   | 1216/1861 [14:41<07:47,  1.38it/s][A
 65%|██████▌   | 1217/1861 [14:42<07:46,  1.38it/s][A
 65%|██████▌   | 1218/1861 [14:42<07:46,  1.38it/s][A
 66%|████

Step: 1300 | Loss: 2.9862069393579778 | Train PPL: 19.810397762000584
Wie würde eine solche Zukunft aussehen ? --> What do you have a lot of the future ? </s>



 70%|██████▉   | 1301/1861 [15:40<06:44,  1.38it/s][A
 70%|██████▉   | 1302/1861 [15:40<06:43,  1.38it/s][A
 70%|███████   | 1303/1861 [15:41<06:43,  1.38it/s][A
 70%|███████   | 1304/1861 [15:42<06:42,  1.38it/s][A
 70%|███████   | 1305/1861 [15:43<06:41,  1.38it/s][A
 70%|███████   | 1306/1861 [15:43<06:41,  1.38it/s][A
 70%|███████   | 1307/1861 [15:44<06:40,  1.38it/s][A
 70%|███████   | 1308/1861 [15:45<06:39,  1.38it/s][A
 70%|███████   | 1309/1861 [15:45<06:38,  1.38it/s][A
 70%|███████   | 1310/1861 [15:46<06:38,  1.38it/s][A
 70%|███████   | 1311/1861 [15:46<06:37,  1.38it/s][A
 70%|███████   | 1312/1861 [15:47<06:36,  1.38it/s][A
 71%|███████   | 1313/1861 [15:48<06:35,  1.38it/s][A
 71%|███████   | 1314/1861 [15:48<06:35,  1.38it/s][A
 71%|███████   | 1315/1861 [15:49<06:34,  1.38it/s][A
 71%|███████   | 1316/1861 [15:50<06:33,  1.38it/s][A
 71%|███████   | 1317/1861 [15:51<06:32,  1.38it/s][A
 71%|███████   | 1318/1861 [15:51<06:32,  1.38it/s][A
 71%|████

Step: 1400 | Loss: 2.9478858681661744 | Train PPL: 19.065603890706008
Wie würde eine solche Zukunft aussehen ? --> What does a lot of people like this ? </s>



 75%|███████▌  | 1401/1861 [16:53<05:32,  1.38it/s][A
 75%|███████▌  | 1402/1861 [16:54<05:32,  1.38it/s][A
 75%|███████▌  | 1403/1861 [16:55<05:31,  1.38it/s][A
 75%|███████▌  | 1404/1861 [16:55<05:30,  1.38it/s][A
 75%|███████▌  | 1405/1861 [16:56<05:29,  1.38it/s][A
 76%|███████▌  | 1406/1861 [16:57<05:29,  1.38it/s][A
 76%|███████▌  | 1407/1861 [16:58<05:28,  1.38it/s][A
 76%|███████▌  | 1408/1861 [16:58<05:27,  1.38it/s][A
 76%|███████▌  | 1409/1861 [16:59<05:27,  1.38it/s][A
 76%|███████▌  | 1410/1861 [17:00<05:26,  1.38it/s][A
 76%|███████▌  | 1411/1861 [17:01<05:25,  1.38it/s][A
 76%|███████▌  | 1412/1861 [17:01<05:24,  1.38it/s][A
 76%|███████▌  | 1413/1861 [17:02<05:24,  1.38it/s][A
 76%|███████▌  | 1414/1861 [17:03<05:23,  1.38it/s][A
 76%|███████▌  | 1415/1861 [17:04<05:22,  1.38it/s][A
 76%|███████▌  | 1416/1861 [17:04<05:22,  1.38it/s][A
 76%|███████▌  | 1417/1861 [17:05<05:21,  1.38it/s][A
 76%|███████▌  | 1418/1861 [17:06<05:20,  1.38it/s][A
 76%|████

Step: 1500 | Loss: 2.910657858888308 | Train PPL: 18.368878723199504 | Val PPL: 9.506536512499661
Wie würde eine solche Zukunft aussehen ? --> How do you have a <unk> of the future ? </s>



 81%|████████  | 1501/1861 [18:08<04:21,  1.38it/s][A
 81%|████████  | 1502/1861 [18:09<04:20,  1.38it/s][A
 81%|████████  | 1503/1861 [18:10<04:19,  1.38it/s][A
 81%|████████  | 1504/1861 [18:10<04:18,  1.38it/s][A
 81%|████████  | 1505/1861 [18:11<04:18,  1.38it/s][A
 81%|████████  | 1506/1861 [18:12<04:17,  1.38it/s][A
 81%|████████  | 1507/1861 [18:13<04:16,  1.38it/s][A
 81%|████████  | 1508/1861 [18:14<04:16,  1.38it/s][A
 81%|████████  | 1509/1861 [18:15<04:15,  1.38it/s][A
 81%|████████  | 1510/1861 [18:15<04:14,  1.38it/s][A
 81%|████████  | 1511/1861 [18:16<04:14,  1.38it/s][A
 81%|████████  | 1512/1861 [18:17<04:13,  1.38it/s][A
 81%|████████▏ | 1513/1861 [18:18<04:12,  1.38it/s][A
 81%|████████▏ | 1514/1861 [18:19<04:11,  1.38it/s][A
 81%|████████▏ | 1515/1861 [18:19<04:11,  1.38it/s][A
 81%|████████▏ | 1516/1861 [18:20<04:10,  1.38it/s][A
 82%|████████▏ | 1517/1861 [18:20<04:09,  1.38it/s][A
 82%|████████▏ | 1518/1861 [18:21<04:08,  1.38it/s][A
 82%|████

Step: 1600 | Loss: 2.875532158873975 | Train PPL: 17.734859373506733
Wie würde eine solche Zukunft aussehen ? --> How do you have a <unk> of the future ? </s>



 86%|████████▌ | 1601/1861 [19:23<03:08,  1.38it/s][A
 86%|████████▌ | 1602/1861 [19:24<03:08,  1.38it/s][A
 86%|████████▌ | 1603/1861 [19:24<03:07,  1.38it/s][A
 86%|████████▌ | 1604/1861 [19:25<03:06,  1.38it/s][A
 86%|████████▌ | 1605/1861 [19:26<03:06,  1.38it/s][A
 86%|████████▋ | 1606/1861 [19:27<03:05,  1.38it/s][A
 86%|████████▋ | 1607/1861 [19:27<03:04,  1.38it/s][A
 86%|████████▋ | 1608/1861 [19:28<03:03,  1.38it/s][A
 86%|████████▋ | 1609/1861 [19:29<03:03,  1.38it/s][A
 87%|████████▋ | 1610/1861 [19:30<03:02,  1.38it/s][A
 87%|████████▋ | 1611/1861 [19:30<03:01,  1.38it/s][A
 87%|████████▋ | 1612/1861 [19:31<03:00,  1.38it/s][A
 87%|████████▋ | 1613/1861 [19:32<03:00,  1.38it/s][A
 87%|████████▋ | 1614/1861 [19:33<02:59,  1.38it/s][A
 87%|████████▋ | 1615/1861 [19:34<02:58,  1.38it/s][A
 87%|████████▋ | 1616/1861 [19:34<02:58,  1.38it/s][A
 87%|████████▋ | 1617/1861 [19:35<02:57,  1.38it/s][A
 87%|████████▋ | 1618/1861 [19:36<02:56,  1.38it/s][A
 87%|████

Step: 1700 | Loss: 2.843920246783425 | Train PPL: 17.182995254379385
Wie würde eine solche Zukunft aussehen ? --> How do you do ? </s>



 91%|█████████▏| 1701/1861 [20:36<01:56,  1.38it/s][A
 91%|█████████▏| 1702/1861 [20:37<01:55,  1.38it/s][A
 92%|█████████▏| 1703/1861 [20:38<01:54,  1.38it/s][A
 92%|█████████▏| 1704/1861 [20:39<01:54,  1.38it/s][A
 92%|█████████▏| 1705/1861 [20:40<01:53,  1.37it/s][A
 92%|█████████▏| 1706/1861 [20:40<01:52,  1.37it/s][A
 92%|█████████▏| 1707/1861 [20:41<01:52,  1.37it/s][A
 92%|█████████▏| 1708/1861 [20:42<01:51,  1.37it/s][A
 92%|█████████▏| 1709/1861 [20:43<01:50,  1.37it/s][A
 92%|█████████▏| 1710/1861 [20:43<01:49,  1.37it/s][A
 92%|█████████▏| 1711/1861 [20:44<01:49,  1.38it/s][A
 92%|█████████▏| 1712/1861 [20:45<01:48,  1.38it/s][A
 92%|█████████▏| 1713/1861 [20:45<01:47,  1.37it/s][A
 92%|█████████▏| 1714/1861 [20:46<01:46,  1.37it/s][A
 92%|█████████▏| 1715/1861 [20:47<01:46,  1.38it/s][A
 92%|█████████▏| 1716/1861 [20:47<01:45,  1.38it/s][A
 92%|█████████▏| 1717/1861 [20:48<01:44,  1.38it/s][A
 92%|█████████▏| 1718/1861 [20:49<01:43,  1.38it/s][A
 92%|████

Step: 1800 | Loss: 2.814336743354797 | Train PPL: 16.68210759149612
Wie würde eine solche Zukunft aussehen ? --> How do you do a <unk> ? </s>



 97%|█████████▋| 1801/1861 [21:48<00:43,  1.38it/s][A
 97%|█████████▋| 1802/1861 [21:49<00:42,  1.38it/s][A
 97%|█████████▋| 1803/1861 [21:49<00:42,  1.38it/s][A
 97%|█████████▋| 1804/1861 [21:50<00:41,  1.38it/s][A
 97%|█████████▋| 1805/1861 [21:51<00:40,  1.38it/s][A
 97%|█████████▋| 1806/1861 [21:51<00:39,  1.38it/s][A
 97%|█████████▋| 1807/1861 [21:52<00:39,  1.38it/s][A
 97%|█████████▋| 1808/1861 [21:53<00:38,  1.38it/s][A
 97%|█████████▋| 1809/1861 [21:54<00:37,  1.38it/s][A
 97%|█████████▋| 1810/1861 [21:55<00:37,  1.38it/s][A
 97%|█████████▋| 1811/1861 [21:56<00:36,  1.38it/s][A
 97%|█████████▋| 1812/1861 [21:57<00:35,  1.38it/s][A
 97%|█████████▋| 1813/1861 [21:57<00:34,  1.38it/s][A
 97%|█████████▋| 1814/1861 [21:58<00:34,  1.38it/s][A
 98%|█████████▊| 1815/1861 [21:59<00:33,  1.38it/s][A
 98%|█████████▊| 1816/1861 [22:00<00:32,  1.38it/s][A
 98%|█████████▊| 1817/1861 [22:00<00:31,  1.38it/s][A
 98%|█████████▊| 1818/1861 [22:01<00:31,  1.38it/s][A
 98%|████

Epoch: 1 | Loss: 2.797065874186336 | Train PPL: 16.39646682608504 | Val PPL: 8.609231101431288



  0%|          | 1/1861 [00:00<29:04,  1.07it/s][A
  0%|          | 2/1861 [00:01<25:59,  1.19it/s][A
  0%|          | 3/1861 [00:02<25:18,  1.22it/s][A
  0%|          | 4/1861 [00:03<23:15,  1.33it/s][A
  0%|          | 5/1861 [00:03<23:24,  1.32it/s][A
  0%|          | 6/1861 [00:04<23:30,  1.32it/s][A
  0%|          | 7/1861 [00:05<23:44,  1.30it/s][A
  0%|          | 8/1861 [00:06<23:49,  1.30it/s][A
  0%|          | 9/1861 [00:06<23:32,  1.31it/s][A
  1%|          | 10/1861 [00:07<23:46,  1.30it/s][A
  1%|          | 11/1861 [00:08<23:49,  1.29it/s][A
  1%|          | 12/1861 [00:09<23:28,  1.31it/s][A
  1%|          | 13/1861 [00:09<23:25,  1.32it/s][A
  1%|          | 14/1861 [00:10<23:08,  1.33it/s][A
  1%|          | 15/1861 [00:11<23:08,  1.33it/s][A
  1%|          | 16/1861 [00:11<22:41,  1.36it/s][A
  1%|          | 17/1861 [00:12<22:48,  1.35it/s][A
  1%|          | 18/1861 [00:13<22:54,  1.34it/s][A
  1%|          | 19/1861 [00:14<22:55,  1.34it/s][A
 

Step: 100 | Loss: 2.1821311110258104 | Train PPL: 8.86517882214785
Wie würde eine solche Zukunft aussehen ? --> How would it be a <unk> ? </s>



  5%|▌         | 101/1861 [01:11<20:45,  1.41it/s][A
  5%|▌         | 102/1861 [01:12<20:46,  1.41it/s][A
  6%|▌         | 103/1861 [01:12<20:44,  1.41it/s][A
  6%|▌         | 104/1861 [01:13<20:45,  1.41it/s][A
  6%|▌         | 105/1861 [01:14<20:44,  1.41it/s][A
  6%|▌         | 106/1861 [01:15<20:43,  1.41it/s][A
  6%|▌         | 107/1861 [01:15<20:44,  1.41it/s][A
  6%|▌         | 108/1861 [01:16<20:47,  1.41it/s][A
  6%|▌         | 109/1861 [01:17<20:43,  1.41it/s][A
  6%|▌         | 110/1861 [01:17<20:36,  1.42it/s][A
  6%|▌         | 111/1861 [01:18<20:32,  1.42it/s][A
  6%|▌         | 112/1861 [01:18<20:33,  1.42it/s][A
  6%|▌         | 113/1861 [01:19<20:34,  1.42it/s][A
  6%|▌         | 114/1861 [01:20<20:30,  1.42it/s][A
  6%|▌         | 115/1861 [01:21<20:30,  1.42it/s][A
  6%|▌         | 116/1861 [01:21<20:31,  1.42it/s][A
  6%|▋         | 117/1861 [01:22<20:32,  1.42it/s][A
  6%|▋         | 118/1861 [01:23<20:29,  1.42it/s][A
  6%|▋         | 119/1861 [

Step: 200 | Loss: 2.183070149719715 | Train PPL: 8.87350747794325
Wie würde eine solche Zukunft aussehen ? --> How do you do a <unk> . </s>



 11%|█         | 201/1861 [02:24<19:54,  1.39it/s][A
 11%|█         | 202/1861 [02:25<19:55,  1.39it/s][A
 11%|█         | 203/1861 [02:26<19:54,  1.39it/s][A
 11%|█         | 204/1861 [02:27<19:57,  1.38it/s][A
 11%|█         | 205/1861 [02:28<19:57,  1.38it/s][A
 11%|█         | 206/1861 [02:29<20:00,  1.38it/s][A
 11%|█         | 207/1861 [02:30<19:59,  1.38it/s][A
 11%|█         | 208/1861 [02:30<19:56,  1.38it/s][A
 11%|█         | 209/1861 [02:31<19:54,  1.38it/s][A
 11%|█▏        | 210/1861 [02:31<19:54,  1.38it/s][A
 11%|█▏        | 211/1861 [02:32<19:55,  1.38it/s][A
 11%|█▏        | 212/1861 [02:33<19:53,  1.38it/s][A
 11%|█▏        | 213/1861 [02:34<19:52,  1.38it/s][A
 11%|█▏        | 214/1861 [02:34<19:49,  1.38it/s][A
 12%|█▏        | 215/1861 [02:35<19:49,  1.38it/s][A
 12%|█▏        | 216/1861 [02:35<19:47,  1.39it/s][A
 12%|█▏        | 217/1861 [02:36<19:47,  1.38it/s][A
 12%|█▏        | 218/1861 [02:37<19:47,  1.38it/s][A
 12%|█▏        | 219/1861 [

Step: 300 | Loss: 2.1851071812709173 | Train PPL: 8.891601515451532
Wie würde eine solche Zukunft aussehen ? --> How do you do a <unk> . </s>



 16%|█▌        | 301/1861 [03:36<18:41,  1.39it/s][A
 16%|█▌        | 302/1861 [03:37<18:41,  1.39it/s][A
 16%|█▋        | 303/1861 [03:37<18:40,  1.39it/s][A
 16%|█▋        | 304/1861 [03:38<18:40,  1.39it/s][A
 16%|█▋        | 305/1861 [03:39<18:38,  1.39it/s][A
 16%|█▋        | 306/1861 [03:40<18:38,  1.39it/s][A
 16%|█▋        | 307/1861 [03:40<18:38,  1.39it/s][A
 17%|█▋        | 308/1861 [03:41<18:38,  1.39it/s][A
 17%|█▋        | 309/1861 [03:42<18:37,  1.39it/s][A
 17%|█▋        | 310/1861 [03:43<18:36,  1.39it/s][A
 17%|█▋        | 311/1861 [03:43<18:35,  1.39it/s][A
 17%|█▋        | 312/1861 [03:44<18:35,  1.39it/s][A
 17%|█▋        | 313/1861 [03:45<18:34,  1.39it/s][A
 17%|█▋        | 314/1861 [03:46<18:34,  1.39it/s][A
 17%|█▋        | 315/1861 [03:46<18:33,  1.39it/s][A
 17%|█▋        | 316/1861 [03:47<18:33,  1.39it/s][A
 17%|█▋        | 317/1861 [03:48<18:33,  1.39it/s][A
 17%|█▋        | 318/1861 [03:49<18:32,  1.39it/s][A
 17%|█▋        | 319/1861 [

Step: 400 | Loss: 2.1735948640853167 | Train PPL: 8.789825541223852
Wie würde eine solche Zukunft aussehen ? --> How would we do about that ? </s>



 22%|██▏       | 401/1861 [04:50<17:37,  1.38it/s][A
 22%|██▏       | 402/1861 [04:51<17:37,  1.38it/s][A
 22%|██▏       | 403/1861 [04:52<17:36,  1.38it/s][A
 22%|██▏       | 404/1861 [04:52<17:35,  1.38it/s][A
 22%|██▏       | 405/1861 [04:53<17:35,  1.38it/s][A
 22%|██▏       | 406/1861 [04:54<17:35,  1.38it/s][A
 22%|██▏       | 407/1861 [04:55<17:35,  1.38it/s][A
 22%|██▏       | 408/1861 [04:56<17:34,  1.38it/s][A
 22%|██▏       | 409/1861 [04:56<17:34,  1.38it/s][A
 22%|██▏       | 410/1861 [04:57<17:33,  1.38it/s][A
 22%|██▏       | 411/1861 [04:58<17:32,  1.38it/s][A
 22%|██▏       | 412/1861 [04:58<17:31,  1.38it/s][A
 22%|██▏       | 413/1861 [04:59<17:30,  1.38it/s][A
 22%|██▏       | 414/1861 [05:00<17:29,  1.38it/s][A
 22%|██▏       | 415/1861 [05:01<17:28,  1.38it/s][A
 22%|██▏       | 416/1861 [05:01<17:27,  1.38it/s][A
 22%|██▏       | 417/1861 [05:01<17:25,  1.38it/s][A
 22%|██▏       | 418/1861 [05:02<17:25,  1.38it/s][A
 23%|██▎       | 419/1861 [

Step: 500 | Loss: 2.163469339251518 | Train PPL: 8.701273021369419 | Val PPL: 7.674458972070044
Wie würde eine solche Zukunft aussehen ? --> How would it like to be a <unk> ? </s>



 27%|██▋       | 501/1861 [06:07<16:36,  1.36it/s][A
 27%|██▋       | 502/1861 [06:08<16:36,  1.36it/s][A
 27%|██▋       | 503/1861 [06:08<16:36,  1.36it/s][A
 27%|██▋       | 504/1861 [06:09<16:35,  1.36it/s][A
 27%|██▋       | 505/1861 [06:10<16:34,  1.36it/s][A
 27%|██▋       | 506/1861 [06:11<16:33,  1.36it/s][A
 27%|██▋       | 507/1861 [06:11<16:33,  1.36it/s][A
 27%|██▋       | 508/1861 [06:12<16:32,  1.36it/s][A
 27%|██▋       | 509/1861 [06:13<16:31,  1.36it/s][A
 27%|██▋       | 510/1861 [06:14<16:31,  1.36it/s][A
 27%|██▋       | 511/1861 [06:14<16:30,  1.36it/s][A
 28%|██▊       | 512/1861 [06:15<16:28,  1.36it/s][A
 28%|██▊       | 513/1861 [06:16<16:28,  1.36it/s][A
 28%|██▊       | 514/1861 [06:16<16:27,  1.36it/s][A
 28%|██▊       | 515/1861 [06:17<16:27,  1.36it/s][A
 28%|██▊       | 516/1861 [06:18<16:26,  1.36it/s][A
 28%|██▊       | 517/1861 [06:19<16:26,  1.36it/s][A
 28%|██▊       | 518/1861 [06:20<16:25,  1.36it/s][A
 28%|██▊       | 519/1861 [

Step: 600 | Loss: 2.148891545236111 | Train PPL: 8.575347742029521
Wie würde eine solche Zukunft aussehen ? --> How would it be to ? </s>



 32%|███▏      | 601/1861 [07:20<15:24,  1.36it/s][A
 32%|███▏      | 602/1861 [07:21<15:22,  1.36it/s][A
 32%|███▏      | 603/1861 [07:22<15:22,  1.36it/s][A
 32%|███▏      | 604/1861 [07:22<15:21,  1.36it/s][A
 33%|███▎      | 605/1861 [07:23<15:20,  1.36it/s][A
 33%|███▎      | 606/1861 [07:23<15:19,  1.37it/s][A
 33%|███▎      | 607/1861 [07:24<15:17,  1.37it/s][A
 33%|███▎      | 608/1861 [07:25<15:17,  1.37it/s][A
 33%|███▎      | 609/1861 [07:25<15:16,  1.37it/s][A
 33%|███▎      | 610/1861 [07:26<15:15,  1.37it/s][A
 33%|███▎      | 611/1861 [07:27<15:15,  1.37it/s][A
 33%|███▎      | 612/1861 [07:28<15:14,  1.37it/s][A
 33%|███▎      | 613/1861 [07:28<15:13,  1.37it/s][A
 33%|███▎      | 614/1861 [07:29<15:12,  1.37it/s][A
 33%|███▎      | 615/1861 [07:30<15:11,  1.37it/s][A
 33%|███▎      | 616/1861 [07:30<15:11,  1.37it/s][A
 33%|███▎      | 617/1861 [07:31<15:10,  1.37it/s][A
 33%|███▎      | 618/1861 [07:32<15:10,  1.37it/s][A
 33%|███▎      | 619/1861 [

Step: 700 | Loss: 2.1419844489438193 | Train PPL: 8.516321075540128
Wie würde eine solche Zukunft aussehen ? --> How would you do ? </s>



 38%|███▊      | 701/1861 [08:35<14:12,  1.36it/s][A
 38%|███▊      | 702/1861 [08:36<14:11,  1.36it/s][A
 38%|███▊      | 703/1861 [08:36<14:10,  1.36it/s][A
 38%|███▊      | 704/1861 [08:37<14:10,  1.36it/s][A
 38%|███▊      | 705/1861 [08:38<14:09,  1.36it/s][A
 38%|███▊      | 706/1861 [08:38<14:08,  1.36it/s][A
 38%|███▊      | 707/1861 [08:39<14:08,  1.36it/s][A
 38%|███▊      | 708/1861 [08:40<14:07,  1.36it/s][A
 38%|███▊      | 709/1861 [08:41<14:06,  1.36it/s][A
 38%|███▊      | 710/1861 [08:41<14:05,  1.36it/s][A
 38%|███▊      | 711/1861 [08:42<14:04,  1.36it/s][A
 38%|███▊      | 712/1861 [08:42<14:03,  1.36it/s][A
 38%|███▊      | 713/1861 [08:43<14:02,  1.36it/s][A
 38%|███▊      | 714/1861 [08:44<14:01,  1.36it/s][A
 38%|███▊      | 715/1861 [08:44<14:01,  1.36it/s][A
 38%|███▊      | 716/1861 [08:45<13:59,  1.36it/s][A
 39%|███▊      | 717/1861 [08:45<13:58,  1.36it/s][A
 39%|███▊      | 718/1861 [08:46<13:57,  1.36it/s][A
 39%|███▊      | 719/1861 [

Step: 800 | Loss: 2.1367889547348025 | Train PPL: 8.472189321118544
Wie würde eine solche Zukunft aussehen ? --> How would it be to be a <unk> ? </s>



 43%|████▎     | 801/1861 [09:44<12:53,  1.37it/s][A
 43%|████▎     | 802/1861 [09:45<12:52,  1.37it/s][A
 43%|████▎     | 803/1861 [09:45<12:52,  1.37it/s][A
 43%|████▎     | 804/1861 [09:46<12:51,  1.37it/s][A
 43%|████▎     | 805/1861 [09:47<12:50,  1.37it/s][A
 43%|████▎     | 806/1861 [09:48<12:49,  1.37it/s][A
 43%|████▎     | 807/1861 [09:48<12:49,  1.37it/s][A
 43%|████▎     | 808/1861 [09:49<12:48,  1.37it/s][A
 43%|████▎     | 809/1861 [09:50<12:47,  1.37it/s][A
 44%|████▎     | 810/1861 [09:50<12:46,  1.37it/s][A
 44%|████▎     | 811/1861 [09:51<12:46,  1.37it/s][A
 44%|████▎     | 812/1861 [09:52<12:45,  1.37it/s][A
 44%|████▎     | 813/1861 [09:53<12:44,  1.37it/s][A
 44%|████▎     | 814/1861 [09:53<12:43,  1.37it/s][A
 44%|████▍     | 815/1861 [09:54<12:43,  1.37it/s][A
 44%|████▍     | 816/1861 [09:55<12:42,  1.37it/s][A
 44%|████▍     | 817/1861 [09:56<12:41,  1.37it/s][A
 44%|████▍     | 818/1861 [09:56<12:40,  1.37it/s][A
 44%|████▍     | 819/1861 [

Step: 900 | Loss: 2.1308959382772446 | Train PPL: 8.422409391052767
Wie würde eine solche Zukunft aussehen ? --> How would you get to look at this ? </s>



 48%|████▊     | 901/1861 [13:18<14:11,  1.13it/s][A
 48%|████▊     | 902/1861 [13:19<14:10,  1.13it/s][A
 49%|████▊     | 903/1861 [13:20<14:09,  1.13it/s][A
 49%|████▊     | 904/1861 [13:21<14:08,  1.13it/s][A
 49%|████▊     | 905/1861 [13:22<14:07,  1.13it/s][A
 49%|████▊     | 906/1861 [13:23<14:06,  1.13it/s][A
 49%|████▊     | 907/1861 [13:24<14:06,  1.13it/s][A
 49%|████▉     | 908/1861 [13:25<14:05,  1.13it/s][A
 49%|████▉     | 909/1861 [13:25<14:04,  1.13it/s][A
 49%|████▉     | 910/1861 [13:26<14:03,  1.13it/s][A
 49%|████▉     | 911/1861 [13:27<14:02,  1.13it/s][A
 49%|████▉     | 912/1861 [13:28<14:01,  1.13it/s][A
 49%|████▉     | 913/1861 [13:29<14:00,  1.13it/s][A
 49%|████▉     | 914/1861 [13:29<13:59,  1.13it/s][A
 49%|████▉     | 915/1861 [13:30<13:57,  1.13it/s][A
 49%|████▉     | 916/1861 [13:31<13:57,  1.13it/s][A
 49%|████▉     | 917/1861 [13:32<13:56,  1.13it/s][A
 49%|████▉     | 918/1861 [13:33<13:55,  1.13it/s][A
 49%|████▉     | 919/1861 [

Step: 1000 | Loss: 2.1231987075805665 | Train PPL: 8.357829026780134 | Val PPL: 7.021349596636095
Wie würde eine solche Zukunft aussehen ? --> How would you do a future ? </s>



 54%|█████▍    | 1001/1861 [16:24<14:06,  1.02it/s][A
 54%|█████▍    | 1002/1861 [16:25<14:04,  1.02it/s][A
 54%|█████▍    | 1003/1861 [16:26<14:03,  1.02it/s][A
 54%|█████▍    | 1004/1861 [16:26<14:02,  1.02it/s][A
 54%|█████▍    | 1005/1861 [16:27<14:01,  1.02it/s][A
 54%|█████▍    | 1006/1861 [16:27<13:59,  1.02it/s][A
 54%|█████▍    | 1007/1861 [16:28<13:58,  1.02it/s][A
 54%|█████▍    | 1008/1861 [16:29<13:57,  1.02it/s][A
 54%|█████▍    | 1009/1861 [16:30<13:56,  1.02it/s][A
 54%|█████▍    | 1010/1861 [16:30<13:54,  1.02it/s][A
 54%|█████▍    | 1011/1861 [16:31<13:53,  1.02it/s][A
 54%|█████▍    | 1012/1861 [16:36<13:56,  1.02it/s][A
 54%|█████▍    | 1013/1861 [16:42<13:59,  1.01it/s][A
 54%|█████▍    | 1014/1861 [16:43<13:58,  1.01it/s][A
 55%|█████▍    | 1015/1861 [16:44<13:57,  1.01it/s][A
 55%|█████▍    | 1016/1861 [16:45<13:56,  1.01it/s][A
 55%|█████▍    | 1017/1861 [16:45<13:54,  1.01it/s][A
 55%|█████▍    | 1018/1861 [16:46<13:53,  1.01it/s][A
 55%|████

Step: 1100 | Loss: 2.1148627254637806 | Train PPL: 8.28844789517987
Wie würde eine solche Zukunft aussehen ? --> How would a sense of that . </s>



 59%|█████▉    | 1101/1861 [19:22<13:22,  1.06s/it][A
 59%|█████▉    | 1102/1861 [19:26<13:23,  1.06s/it][A
 59%|█████▉    | 1103/1861 [19:29<13:23,  1.06s/it][A
 59%|█████▉    | 1104/1861 [19:30<13:22,  1.06s/it][A
 59%|█████▉    | 1105/1861 [19:31<13:21,  1.06s/it][A
 59%|█████▉    | 1106/1861 [19:31<13:19,  1.06s/it][A
 59%|█████▉    | 1107/1861 [19:32<13:18,  1.06s/it][A
 60%|█████▉    | 1108/1861 [19:33<13:17,  1.06s/it][A
 60%|█████▉    | 1109/1861 [19:33<13:15,  1.06s/it][A
 60%|█████▉    | 1110/1861 [19:34<13:14,  1.06s/it][A
 60%|█████▉    | 1111/1861 [19:35<13:13,  1.06s/it][A
 60%|█████▉    | 1112/1861 [19:35<13:11,  1.06s/it][A
 60%|█████▉    | 1113/1861 [19:36<13:10,  1.06s/it][A
 60%|█████▉    | 1114/1861 [19:37<13:09,  1.06s/it][A
 60%|█████▉    | 1115/1861 [19:38<13:08,  1.06s/it][A
 60%|█████▉    | 1116/1861 [19:39<13:07,  1.06s/it][A
 60%|██████    | 1117/1861 [19:40<13:06,  1.06s/it][A
 60%|██████    | 1118/1861 [19:41<13:05,  1.06s/it][A
 60%|████

Step: 1200 | Loss: 2.106573175614079 | Train PPL: 8.22002438504421
Wie würde eine solche Zukunft aussehen ? --> How would a sense of what ? </s>



 65%|██████▍   | 1201/1861 [27:16<14:59,  1.36s/it][A
 65%|██████▍   | 1202/1861 [27:17<14:57,  1.36s/it][A
 65%|██████▍   | 1203/1861 [27:24<14:59,  1.37s/it][A
 65%|██████▍   | 1204/1861 [27:28<14:59,  1.37s/it][A
 65%|██████▍   | 1205/1861 [27:29<14:57,  1.37s/it][A
 65%|██████▍   | 1206/1861 [27:29<14:55,  1.37s/it][A
 65%|██████▍   | 1207/1861 [27:30<14:54,  1.37s/it][A
 65%|██████▍   | 1208/1861 [27:30<14:52,  1.37s/it][A
 65%|██████▍   | 1209/1861 [27:31<14:50,  1.37s/it][A
 65%|██████▌   | 1210/1861 [27:32<14:49,  1.37s/it][A
 65%|██████▌   | 1211/1861 [27:33<14:47,  1.37s/it][A
 65%|██████▌   | 1212/1861 [27:34<14:45,  1.36s/it][A
 65%|██████▌   | 1213/1861 [27:34<14:44,  1.36s/it][A
 65%|██████▌   | 1214/1861 [27:35<14:42,  1.36s/it][A
 65%|██████▌   | 1215/1861 [27:36<14:40,  1.36s/it][A
 65%|██████▌   | 1216/1861 [27:37<14:39,  1.36s/it][A
 65%|██████▌   | 1217/1861 [27:38<14:37,  1.36s/it][A
 65%|██████▌   | 1218/1861 [27:39<14:35,  1.36s/it][A
 66%|████

Step: 1300 | Loss: 2.0958432326179284 | Train PPL: 8.132295496647743
Wie würde eine solche Zukunft aussehen ? --> How would a look at this future ? </s>



 70%|██████▉   | 1301/1861 [1:20:16<34:33,  3.70s/it][A
 70%|██████▉   | 1302/1861 [1:20:17<34:28,  3.70s/it][A
 70%|███████   | 1303/1861 [1:20:18<34:23,  3.70s/it][A
 70%|███████   | 1304/1861 [1:20:19<34:18,  3.70s/it][A
 70%|███████   | 1305/1861 [1:20:20<34:13,  3.69s/it][A
 70%|███████   | 1306/1861 [1:20:21<34:08,  3.69s/it][A
 70%|███████   | 1307/1861 [1:20:22<34:03,  3.69s/it][A
 70%|███████   | 1308/1861 [1:20:22<33:59,  3.69s/it][A
 70%|███████   | 1309/1861 [1:20:23<33:54,  3.69s/it][A
 70%|███████   | 1310/1861 [1:20:24<33:49,  3.68s/it][A
 70%|███████   | 1311/1861 [1:20:25<33:44,  3.68s/it][A
 70%|███████   | 1312/1861 [1:20:26<33:39,  3.68s/it][A
 71%|███████   | 1313/1861 [1:20:27<33:34,  3.68s/it][A
 71%|███████   | 1314/1861 [1:20:27<33:29,  3.67s/it][A
 71%|███████   | 1315/1861 [1:20:28<33:24,  3.67s/it][A
 71%|███████   | 1316/1861 [1:20:29<33:20,  3.67s/it][A
 71%|███████   | 1317/1861 [1:20:30<33:15,  3.67s/it][A
 71%|███████   | 1318/1861 [1:

Step: 1400 | Loss: 2.0882845390907354 | Train PPL: 8.071057697755379
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 75%|███████▌  | 1401/1861 [1:31:21<29:59,  3.91s/it][A
 75%|███████▌  | 1402/1861 [1:31:21<29:54,  3.91s/it][A
 75%|███████▌  | 1403/1861 [1:31:22<29:49,  3.91s/it][A
 75%|███████▌  | 1404/1861 [1:31:23<29:44,  3.91s/it][A
 75%|███████▌  | 1405/1861 [1:31:23<29:39,  3.90s/it][A
 76%|███████▌  | 1406/1861 [1:31:24<29:34,  3.90s/it][A
 76%|███████▌  | 1407/1861 [1:31:27<29:30,  3.90s/it][A
 76%|███████▌  | 1408/1861 [1:31:33<29:27,  3.90s/it][A
 76%|███████▌  | 1409/1861 [1:31:36<29:23,  3.90s/it][A
 76%|███████▌  | 1410/1861 [1:31:37<29:18,  3.90s/it][A
 76%|███████▌  | 1411/1861 [1:31:37<29:13,  3.90s/it][A
 76%|███████▌  | 1412/1861 [1:31:38<29:08,  3.89s/it][A
 76%|███████▌  | 1413/1861 [1:31:39<29:03,  3.89s/it][A
 76%|███████▌  | 1414/1861 [1:31:39<28:58,  3.89s/it][A
 76%|███████▌  | 1415/1861 [1:31:40<28:53,  3.89s/it][A
 76%|███████▌  | 1416/1861 [1:31:41<28:48,  3.89s/it][A
 76%|███████▌  | 1417/1861 [1:31:42<28:44,  3.88s/it][A
 76%|███████▌  | 1418/1861 [1:

Step: 1500 | Loss: 2.0792711791594822 | Train PPL: 7.9986372159241315 | Val PPL: 6.465233851872871
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 81%|████████  | 1501/1861 [1:38:32<23:38,  3.94s/it][A
 81%|████████  | 1502/1861 [1:38:33<23:33,  3.94s/it][A
 81%|████████  | 1503/1861 [1:38:34<23:28,  3.93s/it][A
 81%|████████  | 1504/1861 [1:38:35<23:24,  3.93s/it][A
 81%|████████  | 1505/1861 [1:38:36<23:19,  3.93s/it][A
 81%|████████  | 1506/1861 [1:38:36<23:14,  3.93s/it][A
 81%|████████  | 1507/1861 [1:38:37<23:10,  3.93s/it][A
 81%|████████  | 1508/1861 [1:38:38<23:05,  3.92s/it][A
 81%|████████  | 1509/1861 [1:38:39<23:00,  3.92s/it][A
 81%|████████  | 1510/1861 [1:38:39<22:56,  3.92s/it][A
 81%|████████  | 1511/1861 [1:38:40<22:51,  3.92s/it][A
 81%|████████  | 1512/1861 [1:38:41<22:46,  3.92s/it][A
 81%|████████▏ | 1513/1861 [1:38:42<22:42,  3.91s/it][A
 81%|████████▏ | 1514/1861 [1:38:43<22:37,  3.91s/it][A
 81%|████████▏ | 1515/1861 [1:38:43<22:32,  3.91s/it][A
 81%|████████▏ | 1516/1861 [1:38:44<22:28,  3.91s/it][A
 82%|████████▏ | 1517/1861 [1:38:45<22:23,  3.91s/it][A
 82%|████████▏ | 1518/1861 [1:

Step: 1600 | Loss: 2.0732803915627303 | Train PPL: 7.950862326791479
Wie würde eine solche Zukunft aussehen ? --> How would a <unk> and look like ? </s>



 86%|████████▌ | 1601/1861 [1:39:47<16:12,  3.74s/it][A
 86%|████████▌ | 1602/1861 [1:39:48<16:08,  3.74s/it][A
 86%|████████▌ | 1603/1861 [1:39:49<16:03,  3.74s/it][A
 86%|████████▌ | 1604/1861 [1:39:50<15:59,  3.73s/it][A
 86%|████████▌ | 1605/1861 [1:39:50<15:55,  3.73s/it][A
 86%|████████▋ | 1606/1861 [1:39:51<15:51,  3.73s/it][A
 86%|████████▋ | 1607/1861 [1:39:52<15:47,  3.73s/it][A
 86%|████████▋ | 1608/1861 [1:39:52<15:42,  3.73s/it][A
 86%|████████▋ | 1609/1861 [1:39:53<15:38,  3.72s/it][A
 87%|████████▋ | 1610/1861 [1:39:54<15:34,  3.72s/it][A
 87%|████████▋ | 1611/1861 [1:39:55<15:30,  3.72s/it][A
 87%|████████▋ | 1612/1861 [1:39:55<15:26,  3.72s/it][A
 87%|████████▋ | 1613/1861 [1:39:56<15:21,  3.72s/it][A
 87%|████████▋ | 1614/1861 [1:39:57<15:17,  3.72s/it][A
 87%|████████▋ | 1615/1861 [1:39:57<15:13,  3.71s/it][A
 87%|████████▋ | 1616/1861 [1:39:58<15:09,  3.71s/it][A
 87%|████████▋ | 1617/1861 [1:39:59<15:05,  3.71s/it][A
 87%|████████▋ | 1618/1861 [1:

Step: 1700 | Loss: 2.0651883598110254 | Train PPL: 7.8867833107684575
Wie würde eine solche Zukunft aussehen ? --> How would a <unk> and what ? </s>



 91%|█████████▏| 1701/1861 [1:41:05<09:30,  3.57s/it][A
 91%|█████████▏| 1702/1861 [1:41:06<09:26,  3.56s/it][A
 92%|█████████▏| 1703/1861 [1:41:07<09:22,  3.56s/it][A
 92%|█████████▏| 1704/1861 [1:41:07<09:19,  3.56s/it][A
 92%|█████████▏| 1705/1861 [1:41:09<09:15,  3.56s/it][A
 92%|█████████▏| 1706/1861 [1:41:10<09:11,  3.56s/it][A
 92%|█████████▏| 1707/1861 [1:41:12<09:07,  3.56s/it][A
 92%|█████████▏| 1708/1861 [1:41:13<09:04,  3.56s/it][A
 92%|█████████▏| 1709/1861 [1:41:14<09:00,  3.55s/it][A
 92%|█████████▏| 1710/1861 [1:41:14<08:56,  3.55s/it][A
 92%|█████████▏| 1711/1861 [1:41:15<08:52,  3.55s/it][A
 92%|█████████▏| 1712/1861 [1:41:16<08:48,  3.55s/it][A
 92%|█████████▏| 1713/1861 [1:41:17<08:45,  3.55s/it][A
 92%|█████████▏| 1714/1861 [1:41:18<08:41,  3.55s/it][A
 92%|█████████▏| 1715/1861 [1:41:19<08:37,  3.54s/it][A
 92%|█████████▏| 1716/1861 [1:41:20<08:33,  3.54s/it][A
 92%|█████████▏| 1717/1861 [1:41:21<08:30,  3.54s/it][A
 92%|█████████▏| 1718/1861 [1:

Step: 1800 | Loss: 2.0567624891632135 | Train PPL: 7.820609472368057
Wie würde eine solche Zukunft aussehen ? --> How would a <unk> that look like ? </s>



 97%|█████████▋| 1801/1861 [1:42:26<03:24,  3.41s/it][A
 97%|█████████▋| 1802/1861 [1:42:27<03:21,  3.41s/it][A
 97%|█████████▋| 1803/1861 [1:42:27<03:17,  3.41s/it][A
 97%|█████████▋| 1804/1861 [1:42:28<03:14,  3.41s/it][A
 97%|█████████▋| 1805/1861 [1:42:29<03:10,  3.41s/it][A
 97%|█████████▋| 1806/1861 [1:42:30<03:07,  3.41s/it][A
 97%|█████████▋| 1807/1861 [1:42:31<03:03,  3.40s/it][A
 97%|█████████▋| 1808/1861 [1:42:32<03:00,  3.40s/it][A
 97%|█████████▋| 1809/1861 [1:42:33<02:56,  3.40s/it][A
 97%|█████████▋| 1810/1861 [1:42:33<02:53,  3.40s/it][A
 97%|█████████▋| 1811/1861 [1:42:34<02:49,  3.40s/it][A
 97%|█████████▋| 1812/1861 [1:42:35<02:46,  3.40s/it][A
 97%|█████████▋| 1813/1861 [1:42:36<02:42,  3.40s/it][A
 97%|█████████▋| 1814/1861 [1:42:37<02:39,  3.39s/it][A
 98%|█████████▊| 1815/1861 [1:42:38<02:36,  3.39s/it][A
 98%|█████████▊| 1816/1861 [1:42:39<02:32,  3.39s/it][A
 98%|█████████▊| 1817/1861 [1:42:40<02:29,  3.39s/it][A
 98%|█████████▊| 1818/1861 [1:

Epoch: 2 | Loss: 2.052548015120848 | Train PPL: 7.787719073300527 | Val PPL: 6.157977531948582



  0%|          | 1/1861 [00:00<26:13,  1.18it/s][A
  0%|          | 2/1861 [00:01<27:24,  1.13it/s][A
  0%|          | 3/1861 [00:02<29:10,  1.06it/s][A
  0%|          | 4/1861 [00:03<26:54,  1.15it/s][A
  0%|          | 5/1861 [00:04<25:23,  1.22it/s][A
  0%|          | 6/1861 [00:04<24:04,  1.28it/s][A
  0%|          | 7/1861 [00:05<24:05,  1.28it/s][A
  0%|          | 8/1861 [00:06<24:30,  1.26it/s][A
  0%|          | 9/1861 [00:07<24:42,  1.25it/s][A
  1%|          | 10/1861 [00:07<24:37,  1.25it/s][A
  1%|          | 11/1861 [00:08<24:04,  1.28it/s][A
  1%|          | 12/1861 [00:09<24:04,  1.28it/s][A
  1%|          | 13/1861 [00:10<24:02,  1.28it/s][A
  1%|          | 14/1861 [00:10<23:31,  1.31it/s][A
  1%|          | 15/1861 [00:11<23:21,  1.32it/s][A
  1%|          | 16/1861 [00:12<23:32,  1.31it/s][A
  1%|          | 17/1861 [00:13<23:45,  1.29it/s][A
  1%|          | 18/1861 [00:14<23:53,  1.29it/s][A
  1%|          | 19/1861 [00:14<23:49,  1.29it/s][A
 

Step: 100 | Loss: 1.8181201562285423 | Train PPL: 6.160267218483733
Wie würde eine solche Zukunft aussehen ? --> How would you do a <unk> ? </s>



  5%|▌         | 101/1861 [01:29<26:02,  1.13it/s][A
  5%|▌         | 102/1861 [01:30<26:04,  1.12it/s][A
  6%|▌         | 103/1861 [01:31<26:00,  1.13it/s][A
  6%|▌         | 104/1861 [01:32<26:03,  1.12it/s][A
  6%|▌         | 105/1861 [01:33<26:05,  1.12it/s][A
  6%|▌         | 106/1861 [01:34<26:07,  1.12it/s][A
  6%|▌         | 107/1861 [01:35<26:07,  1.12it/s][A
  6%|▌         | 108/1861 [01:36<26:05,  1.12it/s][A
  6%|▌         | 109/1861 [01:37<26:06,  1.12it/s][A
  6%|▌         | 110/1861 [01:38<26:00,  1.12it/s][A
  6%|▌         | 111/1861 [01:38<25:56,  1.12it/s][A
  6%|▌         | 112/1861 [01:39<25:57,  1.12it/s][A
  6%|▌         | 113/1861 [01:40<25:53,  1.13it/s][A
  6%|▌         | 114/1861 [01:40<25:47,  1.13it/s][A
  6%|▌         | 115/1861 [01:41<25:41,  1.13it/s][A
  6%|▌         | 116/1861 [01:42<25:39,  1.13it/s][A
  6%|▋         | 117/1861 [01:43<25:38,  1.13it/s][A
  6%|▋         | 118/1861 [01:44<25:39,  1.13it/s][A
  6%|▋         | 119/1861 [

Step: 200 | Loss: 1.812044403553009 | Train PPL: 6.122952430947965
Wie würde eine solche Zukunft aussehen ? --> How would a look like ? </s>



 11%|█         | 201/1861 [02:55<24:08,  1.15it/s][A
 11%|█         | 202/1861 [02:56<24:07,  1.15it/s][A
 11%|█         | 203/1861 [02:57<24:05,  1.15it/s][A
 11%|█         | 204/1861 [02:57<24:04,  1.15it/s][A
 11%|█         | 205/1861 [02:58<24:03,  1.15it/s][A
 11%|█         | 206/1861 [02:59<24:02,  1.15it/s][A
 11%|█         | 207/1861 [03:00<23:59,  1.15it/s][A
 11%|█         | 208/1861 [03:00<23:57,  1.15it/s][A
 11%|█         | 209/1861 [03:01<23:55,  1.15it/s][A
 11%|█▏        | 210/1861 [03:02<23:53,  1.15it/s][A
 11%|█▏        | 211/1861 [03:03<23:51,  1.15it/s][A
 11%|█▏        | 212/1861 [03:03<23:50,  1.15it/s][A
 11%|█▏        | 213/1861 [03:04<23:49,  1.15it/s][A
 11%|█▏        | 214/1861 [03:05<23:47,  1.15it/s][A
 12%|█▏        | 215/1861 [03:06<23:45,  1.15it/s][A
 12%|█▏        | 216/1861 [03:06<23:43,  1.16it/s][A
 12%|█▏        | 217/1861 [03:07<23:42,  1.16it/s][A
 12%|█▏        | 218/1861 [03:08<23:41,  1.16it/s][A
 12%|█▏        | 219/1861 [

Step: 300 | Loss: 1.8034512732426327 | Train PPL: 6.070562521862874
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 16%|█▌        | 301/1861 [04:13<21:52,  1.19it/s][A
 16%|█▌        | 302/1861 [04:14<21:52,  1.19it/s][A
 16%|█▋        | 303/1861 [04:14<21:51,  1.19it/s][A
 16%|█▋        | 304/1861 [04:15<21:49,  1.19it/s][A
 16%|█▋        | 305/1861 [04:16<21:49,  1.19it/s][A
 16%|█▋        | 306/1861 [04:17<21:48,  1.19it/s][A
 16%|█▋        | 307/1861 [04:18<21:46,  1.19it/s][A
 17%|█▋        | 308/1861 [04:19<21:46,  1.19it/s][A
 17%|█▋        | 309/1861 [04:19<21:44,  1.19it/s][A
 17%|█▋        | 310/1861 [04:20<21:43,  1.19it/s][A
 17%|█▋        | 311/1861 [04:21<21:42,  1.19it/s][A
 17%|█▋        | 312/1861 [04:22<21:41,  1.19it/s][A
 17%|█▋        | 313/1861 [04:22<21:39,  1.19it/s][A
 17%|█▋        | 314/1861 [04:23<21:38,  1.19it/s][A
 17%|█▋        | 315/1861 [04:24<21:37,  1.19it/s][A
 17%|█▋        | 316/1861 [04:25<21:35,  1.19it/s][A
 17%|█▋        | 317/1861 [04:25<21:34,  1.19it/s][A
 17%|█▋        | 318/1861 [04:26<21:32,  1.19it/s][A
 17%|█▋        | 319/1861 [

Step: 400 | Loss: 1.7901539672911166 | Train PPL: 5.990374717151038
Wie würde eine solche Zukunft aussehen ? --> How would it look like this ? </s>



 22%|██▏       | 401/1861 [05:28<19:56,  1.22it/s][A
 22%|██▏       | 402/1861 [05:29<19:56,  1.22it/s][A
 22%|██▏       | 403/1861 [05:30<19:55,  1.22it/s][A
 22%|██▏       | 404/1861 [05:31<19:54,  1.22it/s][A
 22%|██▏       | 405/1861 [05:32<19:53,  1.22it/s][A
 22%|██▏       | 406/1861 [05:32<19:51,  1.22it/s][A
 22%|██▏       | 407/1861 [05:33<19:51,  1.22it/s][A
 22%|██▏       | 408/1861 [05:34<19:50,  1.22it/s][A
 22%|██▏       | 409/1861 [05:34<19:49,  1.22it/s][A
 22%|██▏       | 410/1861 [05:36<19:49,  1.22it/s][A
 22%|██▏       | 411/1861 [05:37<19:50,  1.22it/s][A
 22%|██▏       | 412/1861 [05:38<19:50,  1.22it/s][A
 22%|██▏       | 413/1861 [05:39<19:49,  1.22it/s][A
 22%|██▏       | 414/1861 [05:39<19:48,  1.22it/s][A
 22%|██▏       | 415/1861 [05:40<19:47,  1.22it/s][A
 22%|██▏       | 416/1861 [05:41<19:46,  1.22it/s][A
 22%|██▏       | 417/1861 [05:42<19:45,  1.22it/s][A
 22%|██▏       | 418/1861 [05:43<19:44,  1.22it/s][A
 23%|██▎       | 419/1861 [

Step: 500 | Loss: 1.781668524682522 | Train PPL: 5.939758789266992 | Val PPL: 5.922292206720281
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 27%|██▋       | 501/1861 [06:48<18:29,  1.23it/s][A
 27%|██▋       | 502/1861 [06:49<18:27,  1.23it/s][A
 27%|██▋       | 503/1861 [06:49<18:26,  1.23it/s][A
 27%|██▋       | 504/1861 [06:50<18:25,  1.23it/s][A
 27%|██▋       | 505/1861 [06:51<18:23,  1.23it/s][A
 27%|██▋       | 506/1861 [06:51<18:22,  1.23it/s][A
 27%|██▋       | 507/1861 [06:52<18:21,  1.23it/s][A
 27%|██▋       | 508/1861 [06:53<18:20,  1.23it/s][A
 27%|██▋       | 509/1861 [06:53<18:19,  1.23it/s][A
 27%|██▋       | 510/1861 [06:54<18:18,  1.23it/s][A
 27%|██▋       | 511/1861 [06:55<18:17,  1.23it/s][A
 28%|██▊       | 512/1861 [06:56<18:16,  1.23it/s][A
 28%|██▊       | 513/1861 [06:56<18:15,  1.23it/s][A
 28%|██▊       | 514/1861 [06:57<18:14,  1.23it/s][A
 28%|██▊       | 515/1861 [06:58<18:13,  1.23it/s][A
 28%|██▊       | 516/1861 [06:59<18:12,  1.23it/s][A
 28%|██▊       | 517/1861 [07:00<18:11,  1.23it/s][A
 28%|██▊       | 518/1861 [07:00<18:11,  1.23it/s][A
 28%|██▊       | 519/1861 [

Step: 600 | Loss: 1.7794269363582134 | Train PPL: 5.926459206978836
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 32%|███▏      | 601/1861 [08:04<16:55,  1.24it/s][A
 32%|███▏      | 602/1861 [08:05<16:54,  1.24it/s][A
 32%|███▏      | 603/1861 [08:06<16:53,  1.24it/s][A
 32%|███▏      | 604/1861 [08:06<16:53,  1.24it/s][A
 33%|███▎      | 605/1861 [08:07<16:52,  1.24it/s][A
 33%|███▎      | 606/1861 [08:08<16:51,  1.24it/s][A
 33%|███▎      | 607/1861 [08:09<16:50,  1.24it/s][A
 33%|███▎      | 608/1861 [08:10<16:50,  1.24it/s][A
 33%|███▎      | 609/1861 [08:11<16:49,  1.24it/s][A
 33%|███▎      | 610/1861 [08:11<16:48,  1.24it/s][A
 33%|███▎      | 611/1861 [08:12<16:48,  1.24it/s][A
 33%|███▎      | 612/1861 [08:13<16:48,  1.24it/s][A
 33%|███▎      | 613/1861 [08:14<16:47,  1.24it/s][A
 33%|███▎      | 614/1861 [08:15<16:46,  1.24it/s][A
 33%|███▎      | 615/1861 [08:16<16:45,  1.24it/s][A
 33%|███▎      | 616/1861 [08:17<16:44,  1.24it/s][A
 33%|███▎      | 617/1861 [08:18<16:44,  1.24it/s][A
 33%|███▎      | 618/1861 [08:18<16:42,  1.24it/s][A
 33%|███▎      | 619/1861 [

Step: 700 | Loss: 1.7795905976210322 | Train PPL: 5.927429218151157
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 38%|███▊      | 701/1861 [09:17<15:22,  1.26it/s][A
 38%|███▊      | 702/1861 [09:18<15:22,  1.26it/s][A
 38%|███▊      | 703/1861 [09:19<15:21,  1.26it/s][A
 38%|███▊      | 704/1861 [09:19<15:20,  1.26it/s][A
 38%|███▊      | 705/1861 [09:20<15:19,  1.26it/s][A
 38%|███▊      | 706/1861 [09:21<15:18,  1.26it/s][A
 38%|███▊      | 707/1861 [09:21<15:16,  1.26it/s][A
 38%|███▊      | 708/1861 [09:22<15:15,  1.26it/s][A
 38%|███▊      | 709/1861 [09:23<15:15,  1.26it/s][A
 38%|███▊      | 710/1861 [09:23<15:14,  1.26it/s][A
 38%|███▊      | 711/1861 [09:24<15:12,  1.26it/s][A
 38%|███▊      | 712/1861 [09:25<15:12,  1.26it/s][A
 38%|███▊      | 713/1861 [09:25<15:11,  1.26it/s][A
 38%|███▊      | 714/1861 [09:26<15:10,  1.26it/s][A
 38%|███▊      | 715/1861 [09:27<15:09,  1.26it/s][A
 38%|███▊      | 716/1861 [09:28<15:08,  1.26it/s][A
 39%|███▊      | 717/1861 [09:28<15:07,  1.26it/s][A
 39%|███▊      | 718/1861 [09:29<15:06,  1.26it/s][A
 39%|███▊      | 719/1861 [

Step: 800 | Loss: 1.773966218493879 | Train PPL: 5.894184686540833
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 43%|████▎     | 801/1861 [10:24<13:46,  1.28it/s][A
 43%|████▎     | 802/1861 [10:25<13:45,  1.28it/s][A
 43%|████▎     | 803/1861 [10:25<13:44,  1.28it/s][A
 43%|████▎     | 804/1861 [10:26<13:43,  1.28it/s][A
 43%|████▎     | 805/1861 [10:27<13:42,  1.28it/s][A
 43%|████▎     | 806/1861 [10:28<13:42,  1.28it/s][A
 43%|████▎     | 807/1861 [10:28<13:40,  1.28it/s][A
 43%|████▎     | 808/1861 [10:29<13:39,  1.28it/s][A
 43%|████▎     | 809/1861 [10:29<13:39,  1.28it/s][A
 44%|████▎     | 810/1861 [10:30<13:38,  1.28it/s][A
 44%|████▎     | 811/1861 [10:31<13:37,  1.28it/s][A
 44%|████▎     | 812/1861 [10:32<13:36,  1.28it/s][A
 44%|████▎     | 813/1861 [10:32<13:35,  1.28it/s][A
 44%|████▎     | 814/1861 [10:33<13:34,  1.29it/s][A
 44%|████▍     | 815/1861 [10:33<13:33,  1.29it/s][A
 44%|████▍     | 816/1861 [10:34<13:32,  1.29it/s][A
 44%|████▍     | 817/1861 [10:34<13:31,  1.29it/s][A
 44%|████▍     | 818/1861 [10:35<13:30,  1.29it/s][A
 44%|████▍     | 819/1861 [

Step: 900 | Loss: 1.7716770467824405 | Train PPL: 5.880707317585751
Wie würde eine solche Zukunft aussehen ? --> How would a sense of us ? </s>



 48%|████▊     | 901/1861 [11:30<12:15,  1.30it/s][A
 48%|████▊     | 902/1861 [11:31<12:15,  1.30it/s][A
 49%|████▊     | 903/1861 [11:32<12:14,  1.30it/s][A
 49%|████▊     | 904/1861 [11:33<12:13,  1.30it/s][A
 49%|████▊     | 905/1861 [11:33<12:12,  1.30it/s][A
 49%|████▊     | 906/1861 [11:34<12:11,  1.31it/s][A
 49%|████▊     | 907/1861 [11:34<12:10,  1.31it/s][A
 49%|████▉     | 908/1861 [11:35<12:09,  1.31it/s][A
 49%|████▉     | 909/1861 [11:36<12:09,  1.31it/s][A
 49%|████▉     | 910/1861 [11:36<12:08,  1.31it/s][A
 49%|████▉     | 911/1861 [11:37<12:07,  1.31it/s][A
 49%|████▉     | 912/1861 [11:38<12:06,  1.31it/s][A
 49%|████▉     | 913/1861 [11:38<12:05,  1.31it/s][A
 49%|████▉     | 914/1861 [11:39<12:04,  1.31it/s][A
 49%|████▉     | 915/1861 [11:40<12:03,  1.31it/s][A
 49%|████▉     | 916/1861 [11:40<12:02,  1.31it/s][A
 49%|████▉     | 917/1861 [11:41<12:01,  1.31it/s][A
 49%|████▉     | 918/1861 [11:42<12:01,  1.31it/s][A
 49%|████▉     | 919/1861 [

Step: 1000 | Loss: 1.7715908190011977 | Train PPL: 5.880200259103195 | Val PPL: 5.637336980673194
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this ? </s>



 54%|█████▍    | 1001/1861 [12:39<10:52,  1.32it/s][A
 54%|█████▍    | 1002/1861 [12:40<10:51,  1.32it/s][A
 54%|█████▍    | 1003/1861 [12:40<10:50,  1.32it/s][A
 54%|█████▍    | 1004/1861 [12:41<10:49,  1.32it/s][A
 54%|█████▍    | 1005/1861 [12:42<10:49,  1.32it/s][A
 54%|█████▍    | 1006/1861 [12:42<10:48,  1.32it/s][A
 54%|█████▍    | 1007/1861 [12:43<10:47,  1.32it/s][A
 54%|█████▍    | 1008/1861 [12:44<10:46,  1.32it/s][A
 54%|█████▍    | 1009/1861 [12:44<10:45,  1.32it/s][A
 54%|█████▍    | 1010/1861 [12:45<10:44,  1.32it/s][A
 54%|█████▍    | 1011/1861 [12:45<10:43,  1.32it/s][A
 54%|█████▍    | 1012/1861 [12:46<10:42,  1.32it/s][A
 54%|█████▍    | 1013/1861 [12:47<10:42,  1.32it/s][A
 54%|█████▍    | 1014/1861 [12:47<10:41,  1.32it/s][A
 55%|█████▍    | 1015/1861 [12:48<10:40,  1.32it/s][A
 55%|█████▍    | 1016/1861 [12:49<10:39,  1.32it/s][A
 55%|█████▍    | 1017/1861 [12:49<10:38,  1.32it/s][A
 55%|█████▍    | 1018/1861 [12:50<10:38,  1.32it/s][A
 55%|████

Step: 1100 | Loss: 1.7696061879938298 | Train PPL: 5.868541804031818
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 59%|█████▉    | 1101/1861 [13:45<09:30,  1.33it/s][A
 59%|█████▉    | 1102/1861 [13:46<09:29,  1.33it/s][A
 59%|█████▉    | 1103/1861 [13:47<09:28,  1.33it/s][A
 59%|█████▉    | 1104/1861 [13:47<09:27,  1.33it/s][A
 59%|█████▉    | 1105/1861 [13:48<09:26,  1.33it/s][A
 59%|█████▉    | 1106/1861 [13:49<09:26,  1.33it/s][A
 59%|█████▉    | 1107/1861 [13:50<09:25,  1.33it/s][A
 60%|█████▉    | 1108/1861 [13:50<09:24,  1.33it/s][A
 60%|█████▉    | 1109/1861 [13:51<09:23,  1.33it/s][A
 60%|█████▉    | 1110/1861 [13:51<09:22,  1.33it/s][A
 60%|█████▉    | 1111/1861 [13:52<09:22,  1.33it/s][A
 60%|█████▉    | 1112/1861 [13:53<09:21,  1.33it/s][A
 60%|█████▉    | 1113/1861 [13:53<09:20,  1.33it/s][A
 60%|█████▉    | 1114/1861 [13:54<09:19,  1.33it/s][A
 60%|█████▉    | 1115/1861 [13:55<09:18,  1.34it/s][A
 60%|█████▉    | 1116/1861 [13:55<09:18,  1.34it/s][A
 60%|██████    | 1117/1861 [13:56<09:17,  1.34it/s][A
 60%|██████    | 1118/1861 [13:57<09:16,  1.34it/s][A
 60%|████

Step: 1200 | Loss: 1.7667844516038895 | Train PPL: 5.852005667348805
Wie würde eine solche Zukunft aussehen ? --> How would it look like ? </s>



 65%|██████▍   | 1201/1861 [14:52<08:10,  1.35it/s][A
 65%|██████▍   | 1202/1861 [14:52<08:09,  1.35it/s][A
 65%|██████▍   | 1203/1861 [14:53<08:08,  1.35it/s][A
 65%|██████▍   | 1204/1861 [14:54<08:07,  1.35it/s][A
 65%|██████▍   | 1205/1861 [14:54<08:07,  1.35it/s][A
 65%|██████▍   | 1206/1861 [14:55<08:06,  1.35it/s][A
 65%|██████▍   | 1207/1861 [14:56<08:05,  1.35it/s][A
 65%|██████▍   | 1208/1861 [14:56<08:04,  1.35it/s][A
 65%|██████▍   | 1209/1861 [14:57<08:03,  1.35it/s][A
 65%|██████▌   | 1210/1861 [14:57<08:03,  1.35it/s][A
 65%|██████▌   | 1211/1861 [14:58<08:02,  1.35it/s][A
 65%|██████▌   | 1212/1861 [14:59<08:01,  1.35it/s][A
 65%|██████▌   | 1213/1861 [15:00<08:00,  1.35it/s][A
 65%|██████▌   | 1214/1861 [15:00<07:59,  1.35it/s][A
 65%|██████▌   | 1215/1861 [15:01<07:59,  1.35it/s][A
 65%|██████▌   | 1216/1861 [15:01<07:58,  1.35it/s][A
 65%|██████▌   | 1217/1861 [15:02<07:57,  1.35it/s][A
 65%|██████▌   | 1218/1861 [15:03<07:56,  1.35it/s][A
 66%|████

Step: 1300 | Loss: 1.766633806343262 | Train PPL: 5.851124156829211
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 70%|██████▉   | 1301/1861 [15:57<06:52,  1.36it/s][A
 70%|██████▉   | 1302/1861 [15:58<06:51,  1.36it/s][A
 70%|███████   | 1303/1861 [15:59<06:50,  1.36it/s][A
 70%|███████   | 1304/1861 [15:59<06:49,  1.36it/s][A
 70%|███████   | 1305/1861 [16:00<06:49,  1.36it/s][A
 70%|███████   | 1306/1861 [16:01<06:48,  1.36it/s][A
 70%|███████   | 1307/1861 [16:01<06:47,  1.36it/s][A
 70%|███████   | 1308/1861 [16:02<06:46,  1.36it/s][A
 70%|███████   | 1309/1861 [16:03<06:46,  1.36it/s][A
 70%|███████   | 1310/1861 [16:03<06:45,  1.36it/s][A
 70%|███████   | 1311/1861 [16:04<06:44,  1.36it/s][A
 70%|███████   | 1312/1861 [16:05<06:43,  1.36it/s][A
 71%|███████   | 1313/1861 [16:05<06:43,  1.36it/s][A
 71%|███████   | 1314/1861 [16:06<06:42,  1.36it/s][A
 71%|███████   | 1315/1861 [16:07<06:41,  1.36it/s][A
 71%|███████   | 1316/1861 [16:07<06:40,  1.36it/s][A
 71%|███████   | 1317/1861 [16:08<06:39,  1.36it/s][A
 71%|███████   | 1318/1861 [16:08<06:39,  1.36it/s][A
 71%|████

Step: 1400 | Loss: 1.7643505705467293 | Train PPL: 5.837779900543348
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this ? </s>



 75%|███████▌  | 1401/1861 [17:03<05:36,  1.37it/s][A
 75%|███████▌  | 1402/1861 [17:04<05:35,  1.37it/s][A
 75%|███████▌  | 1403/1861 [17:05<05:34,  1.37it/s][A
 75%|███████▌  | 1404/1861 [17:05<05:33,  1.37it/s][A
 75%|███████▌  | 1405/1861 [17:06<05:33,  1.37it/s][A
 76%|███████▌  | 1406/1861 [17:07<05:32,  1.37it/s][A
 76%|███████▌  | 1407/1861 [17:07<05:31,  1.37it/s][A
 76%|███████▌  | 1408/1861 [17:08<05:30,  1.37it/s][A
 76%|███████▌  | 1409/1861 [17:09<05:30,  1.37it/s][A
 76%|███████▌  | 1410/1861 [17:09<05:29,  1.37it/s][A
 76%|███████▌  | 1411/1861 [17:10<05:28,  1.37it/s][A
 76%|███████▌  | 1412/1861 [17:11<05:27,  1.37it/s][A
 76%|███████▌  | 1413/1861 [17:12<05:27,  1.37it/s][A
 76%|███████▌  | 1414/1861 [17:12<05:26,  1.37it/s][A
 76%|███████▌  | 1415/1861 [17:13<05:25,  1.37it/s][A
 76%|███████▌  | 1416/1861 [17:13<05:24,  1.37it/s][A
 76%|███████▌  | 1417/1861 [17:14<05:24,  1.37it/s][A
 76%|███████▌  | 1418/1861 [17:15<05:23,  1.37it/s][A
 76%|████

Step: 1500 | Loss: 1.762458048760891 | Train PPL: 5.826742222717929 | Val PPL: 5.4553543872892725
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 81%|████████  | 1501/1861 [18:11<04:21,  1.38it/s][A
 81%|████████  | 1502/1861 [18:11<04:20,  1.38it/s][A
 81%|████████  | 1503/1861 [18:12<04:20,  1.38it/s][A
 81%|████████  | 1504/1861 [18:13<04:19,  1.38it/s][A
 81%|████████  | 1505/1861 [18:14<04:18,  1.38it/s][A
 81%|████████  | 1506/1861 [18:14<04:18,  1.38it/s][A
 81%|████████  | 1507/1861 [18:15<04:17,  1.38it/s][A
 81%|████████  | 1508/1861 [18:16<04:16,  1.38it/s][A
 81%|████████  | 1509/1861 [18:17<04:15,  1.38it/s][A
 81%|████████  | 1510/1861 [18:17<04:15,  1.38it/s][A
 81%|████████  | 1511/1861 [18:18<04:14,  1.38it/s][A
 81%|████████  | 1512/1861 [18:19<04:13,  1.38it/s][A
 81%|████████▏ | 1513/1861 [18:19<04:12,  1.38it/s][A
 81%|████████▏ | 1514/1861 [18:20<04:12,  1.38it/s][A
 81%|████████▏ | 1515/1861 [18:21<04:11,  1.38it/s][A
 81%|████████▏ | 1516/1861 [18:21<04:10,  1.38it/s][A
 82%|████████▏ | 1517/1861 [18:22<04:10,  1.38it/s][A
 82%|████████▏ | 1518/1861 [18:23<04:09,  1.38it/s][A
 82%|████

Step: 1600 | Loss: 1.7595400016382337 | Train PPL: 5.809764297580509
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 86%|████████▌ | 1601/1861 [19:18<03:08,  1.38it/s][A
 86%|████████▌ | 1602/1861 [19:19<03:07,  1.38it/s][A
 86%|████████▌ | 1603/1861 [19:19<03:06,  1.38it/s][A
 86%|████████▌ | 1604/1861 [19:20<03:05,  1.38it/s][A
 86%|████████▌ | 1605/1861 [19:20<03:05,  1.38it/s][A
 86%|████████▋ | 1606/1861 [19:21<03:04,  1.38it/s][A
 86%|████████▋ | 1607/1861 [19:22<03:03,  1.38it/s][A
 86%|████████▋ | 1608/1861 [19:23<03:03,  1.38it/s][A
 86%|████████▋ | 1609/1861 [19:23<03:02,  1.38it/s][A
 87%|████████▋ | 1610/1861 [19:24<03:01,  1.38it/s][A
 87%|████████▋ | 1611/1861 [19:25<03:00,  1.38it/s][A
 87%|████████▋ | 1612/1861 [19:26<03:00,  1.38it/s][A
 87%|████████▋ | 1613/1861 [19:26<02:59,  1.38it/s][A
 87%|████████▋ | 1614/1861 [19:27<02:58,  1.38it/s][A
 87%|████████▋ | 1615/1861 [19:27<02:57,  1.38it/s][A
 87%|████████▋ | 1616/1861 [19:28<02:57,  1.38it/s][A
 87%|████████▋ | 1617/1861 [19:29<02:56,  1.38it/s][A
 87%|████████▋ | 1618/1861 [19:29<02:55,  1.38it/s][A
 87%|████

Step: 1700 | Loss: 1.7567832290775636 | Train PPL: 5.793770155017002
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 91%|█████████▏| 1701/1861 [20:25<01:55,  1.39it/s][A
 91%|█████████▏| 1702/1861 [20:25<01:54,  1.39it/s][A
 92%|█████████▏| 1703/1861 [20:26<01:53,  1.39it/s][A
 92%|█████████▏| 1704/1861 [20:27<01:53,  1.39it/s][A
 92%|█████████▏| 1705/1861 [20:27<01:52,  1.39it/s][A
 92%|█████████▏| 1706/1861 [20:28<01:51,  1.39it/s][A
 92%|█████████▏| 1707/1861 [20:29<01:50,  1.39it/s][A
 92%|█████████▏| 1708/1861 [20:29<01:50,  1.39it/s][A
 92%|█████████▏| 1709/1861 [20:30<01:49,  1.39it/s][A
 92%|█████████▏| 1710/1861 [20:31<01:48,  1.39it/s][A
 92%|█████████▏| 1711/1861 [20:31<01:48,  1.39it/s][A
 92%|█████████▏| 1712/1861 [20:32<01:47,  1.39it/s][A
 92%|█████████▏| 1713/1861 [20:33<01:46,  1.39it/s][A
 92%|█████████▏| 1714/1861 [20:33<01:45,  1.39it/s][A
 92%|█████████▏| 1715/1861 [20:34<01:45,  1.39it/s][A
 92%|█████████▏| 1716/1861 [20:35<01:44,  1.39it/s][A
 92%|█████████▏| 1717/1861 [20:36<01:43,  1.39it/s][A
 92%|█████████▏| 1718/1861 [20:36<01:42,  1.39it/s][A
 92%|████

Step: 1800 | Loss: 1.7548240130146344 | Train PPL: 5.782430019979186
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 97%|█████████▋| 1801/1861 [21:31<00:43,  1.39it/s][A
 97%|█████████▋| 1802/1861 [21:32<00:42,  1.39it/s][A
 97%|█████████▋| 1803/1861 [21:32<00:41,  1.39it/s][A
 97%|█████████▋| 1804/1861 [21:33<00:40,  1.39it/s][A
 97%|█████████▋| 1805/1861 [21:34<00:40,  1.39it/s][A
 97%|█████████▋| 1806/1861 [21:34<00:39,  1.39it/s][A
 97%|█████████▋| 1807/1861 [21:35<00:38,  1.39it/s][A
 97%|█████████▋| 1808/1861 [21:35<00:37,  1.40it/s][A
 97%|█████████▋| 1809/1861 [21:36<00:37,  1.40it/s][A
 97%|█████████▋| 1810/1861 [21:37<00:36,  1.40it/s][A
 97%|█████████▋| 1811/1861 [21:37<00:35,  1.40it/s][A
 97%|█████████▋| 1812/1861 [21:38<00:35,  1.40it/s][A
 97%|█████████▋| 1813/1861 [21:39<00:34,  1.40it/s][A
 97%|█████████▋| 1814/1861 [21:39<00:33,  1.40it/s][A
 98%|█████████▊| 1815/1861 [21:40<00:32,  1.40it/s][A
 98%|█████████▊| 1816/1861 [21:40<00:32,  1.40it/s][A
 98%|█████████▊| 1817/1861 [21:41<00:31,  1.40it/s][A
 98%|█████████▊| 1818/1861 [21:42<00:30,  1.40it/s][A
 98%|████

Epoch: 3 | Loss: 1.7542045343825408 | Train PPL: 5.778849037425837 | Val PPL: 5.36728433896466



  0%|          | 1/1861 [00:00<26:35,  1.17it/s][A
  0%|          | 2/1861 [00:01<24:37,  1.26it/s][A
  0%|          | 3/1861 [00:02<25:14,  1.23it/s][A
  0%|          | 4/1861 [00:02<22:44,  1.36it/s][A
  0%|          | 5/1861 [00:03<22:01,  1.40it/s][A
  0%|          | 6/1861 [00:04<22:16,  1.39it/s][A
  0%|          | 7/1861 [00:04<21:52,  1.41it/s][A
  0%|          | 8/1861 [00:05<22:01,  1.40it/s][A
  0%|          | 9/1861 [00:06<22:07,  1.40it/s][A
  1%|          | 10/1861 [00:07<22:11,  1.39it/s][A
  1%|          | 11/1861 [00:07<21:47,  1.42it/s][A
  1%|          | 12/1861 [00:08<21:55,  1.41it/s][A
  1%|          | 13/1861 [00:09<21:58,  1.40it/s][A
  1%|          | 14/1861 [00:10<22:02,  1.40it/s][A
  1%|          | 15/1861 [00:10<22:01,  1.40it/s][A
  1%|          | 16/1861 [00:11<22:05,  1.39it/s][A
  1%|          | 17/1861 [00:12<22:09,  1.39it/s][A
  1%|          | 18/1861 [00:12<21:49,  1.41it/s][A
  1%|          | 19/1861 [00:13<21:31,  1.43it/s][A
 

Step: 100 | Loss: 1.5787490928173065 | Train PPL: 4.848886508999799
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



  5%|▌         | 101/1861 [01:07<19:30,  1.50it/s][A
  5%|▌         | 102/1861 [01:08<19:32,  1.50it/s][A
  6%|▌         | 103/1861 [01:08<19:33,  1.50it/s][A
  6%|▌         | 104/1861 [01:09<19:29,  1.50it/s][A
  6%|▌         | 105/1861 [01:10<19:31,  1.50it/s][A
  6%|▌         | 106/1861 [01:10<19:29,  1.50it/s][A
  6%|▌         | 107/1861 [01:11<19:27,  1.50it/s][A
  6%|▌         | 108/1861 [01:12<19:28,  1.50it/s][A
  6%|▌         | 109/1861 [01:12<19:29,  1.50it/s][A
  6%|▌         | 110/1861 [01:13<19:30,  1.50it/s][A
  6%|▌         | 111/1861 [01:14<19:30,  1.49it/s][A
  6%|▌         | 112/1861 [01:14<19:29,  1.50it/s][A
  6%|▌         | 113/1861 [01:15<19:26,  1.50it/s][A
  6%|▌         | 114/1861 [01:16<19:27,  1.50it/s][A
  6%|▌         | 115/1861 [01:16<19:27,  1.50it/s][A
  6%|▌         | 116/1861 [01:17<19:28,  1.49it/s][A
  6%|▋         | 117/1861 [01:18<19:27,  1.49it/s][A
  6%|▋         | 118/1861 [01:19<19:27,  1.49it/s][A
  6%|▋         | 119/1861 [

Step: 200 | Loss: 1.5788751505315304 | Train PPL: 4.849497787077004
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 11%|█         | 201/1861 [02:13<18:24,  1.50it/s][A
 11%|█         | 202/1861 [02:14<18:24,  1.50it/s][A
 11%|█         | 203/1861 [02:14<18:22,  1.50it/s][A
 11%|█         | 204/1861 [02:15<18:22,  1.50it/s][A
 11%|█         | 205/1861 [02:16<18:22,  1.50it/s][A
 11%|█         | 206/1861 [02:17<18:22,  1.50it/s][A
 11%|█         | 207/1861 [02:17<18:20,  1.50it/s][A
 11%|█         | 208/1861 [02:18<18:19,  1.50it/s][A
 11%|█         | 209/1861 [02:18<18:17,  1.51it/s][A
 11%|█▏        | 210/1861 [02:19<18:17,  1.50it/s][A
 11%|█▏        | 211/1861 [02:20<18:16,  1.51it/s][A
 11%|█▏        | 212/1861 [02:20<18:15,  1.50it/s][A
 11%|█▏        | 213/1861 [02:21<18:15,  1.50it/s][A
 11%|█▏        | 214/1861 [02:22<18:14,  1.50it/s][A
 12%|█▏        | 215/1861 [02:23<18:14,  1.50it/s][A
 12%|█▏        | 216/1861 [02:23<18:13,  1.50it/s][A
 12%|█▏        | 217/1861 [02:24<18:12,  1.50it/s][A
 12%|█▏        | 218/1861 [02:24<18:12,  1.50it/s][A
 12%|█▏        | 219/1861 [

Step: 300 | Loss: 1.581906679471334 | Train PPL: 4.864221486346155
Wie würde eine solche Zukunft aussehen ? --> How would a look like this ? </s>



 16%|█▌        | 301/1861 [03:20<17:20,  1.50it/s][A
 16%|█▌        | 302/1861 [03:21<17:20,  1.50it/s][A
 16%|█▋        | 303/1861 [03:21<17:18,  1.50it/s][A
 16%|█▋        | 304/1861 [03:22<17:18,  1.50it/s][A
 16%|█▋        | 305/1861 [03:23<17:17,  1.50it/s][A
 16%|█▋        | 306/1861 [03:24<17:17,  1.50it/s][A
 16%|█▋        | 307/1861 [03:24<17:16,  1.50it/s][A
 17%|█▋        | 308/1861 [03:25<17:15,  1.50it/s][A
 17%|█▋        | 309/1861 [03:25<17:14,  1.50it/s][A
 17%|█▋        | 310/1861 [03:26<17:14,  1.50it/s][A
 17%|█▋        | 311/1861 [03:27<17:14,  1.50it/s][A
 17%|█▋        | 312/1861 [03:28<17:12,  1.50it/s][A
 17%|█▋        | 313/1861 [03:28<17:12,  1.50it/s][A
 17%|█▋        | 314/1861 [03:29<17:11,  1.50it/s][A
 17%|█▋        | 315/1861 [03:30<17:11,  1.50it/s][A
 17%|█▋        | 316/1861 [03:30<17:10,  1.50it/s][A
 17%|█▋        | 317/1861 [03:31<17:09,  1.50it/s][A
 17%|█▋        | 318/1861 [03:31<17:08,  1.50it/s][A
 17%|█▋        | 319/1861 [

Step: 400 | Loss: 1.5801271480321883 | Train PPL: 4.855573148560975
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this is what ? </s>



 22%|██▏       | 401/1861 [04:26<16:10,  1.50it/s][A
 22%|██▏       | 402/1861 [04:27<16:09,  1.50it/s][A
 22%|██▏       | 403/1861 [04:27<16:09,  1.50it/s][A
 22%|██▏       | 404/1861 [04:28<16:08,  1.50it/s][A
 22%|██▏       | 405/1861 [04:29<16:07,  1.50it/s][A
 22%|██▏       | 406/1861 [04:29<16:07,  1.50it/s][A
 22%|██▏       | 407/1861 [04:30<16:05,  1.51it/s][A
 22%|██▏       | 408/1861 [04:30<16:04,  1.51it/s][A
 22%|██▏       | 409/1861 [04:31<16:03,  1.51it/s][A
 22%|██▏       | 410/1861 [04:32<16:03,  1.51it/s][A
 22%|██▏       | 411/1861 [04:32<16:02,  1.51it/s][A
 22%|██▏       | 412/1861 [04:33<16:01,  1.51it/s][A
 22%|██▏       | 413/1861 [04:33<16:00,  1.51it/s][A
 22%|██▏       | 414/1861 [04:34<16:00,  1.51it/s][A
 22%|██▏       | 415/1861 [04:35<15:59,  1.51it/s][A
 22%|██▏       | 416/1861 [04:36<15:58,  1.51it/s][A
 22%|██▏       | 417/1861 [04:36<15:57,  1.51it/s][A
 22%|██▏       | 418/1861 [04:37<15:57,  1.51it/s][A
 23%|██▎       | 419/1861 [

Step: 500 | Loss: 1.5791440076828003 | Train PPL: 4.850801784523792 | Val PPL: 5.265254354006683
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 27%|██▋       | 501/1861 [05:34<15:08,  1.50it/s][A
 27%|██▋       | 502/1861 [05:35<15:07,  1.50it/s][A
 27%|██▋       | 503/1861 [05:36<15:07,  1.50it/s][A
 27%|██▋       | 504/1861 [05:36<15:06,  1.50it/s][A
 27%|██▋       | 505/1861 [05:37<15:05,  1.50it/s][A
 27%|██▋       | 506/1861 [05:38<15:05,  1.50it/s][A
 27%|██▋       | 507/1861 [05:38<15:04,  1.50it/s][A
 27%|██▋       | 508/1861 [05:39<15:03,  1.50it/s][A
 27%|██▋       | 509/1861 [05:40<15:03,  1.50it/s][A
 27%|██▋       | 510/1861 [05:40<15:01,  1.50it/s][A
 27%|██▋       | 511/1861 [05:41<15:01,  1.50it/s][A
 28%|██▊       | 512/1861 [05:41<15:01,  1.50it/s][A
 28%|██▊       | 513/1861 [05:42<15:00,  1.50it/s][A
 28%|██▊       | 514/1861 [05:43<14:59,  1.50it/s][A
 28%|██▊       | 515/1861 [05:44<14:59,  1.50it/s][A
 28%|██▊       | 516/1861 [05:44<14:58,  1.50it/s][A
 28%|██▊       | 517/1861 [05:45<14:57,  1.50it/s][A
 28%|██▊       | 518/1861 [05:45<14:56,  1.50it/s][A
 28%|██▊       | 519/1861 [

Step: 600 | Loss: 1.5780456409355004 | Train PPL: 4.8454767501016445
Wie würde eine solche Zukunft aussehen ? --> How would a future look like ? </s>



 32%|███▏      | 601/1861 [06:40<14:00,  1.50it/s][A
 32%|███▏      | 602/1861 [06:41<13:59,  1.50it/s][A
 32%|███▏      | 603/1861 [06:42<13:59,  1.50it/s][A
 32%|███▏      | 604/1861 [06:42<13:58,  1.50it/s][A
 33%|███▎      | 605/1861 [06:43<13:57,  1.50it/s][A
 33%|███▎      | 606/1861 [06:44<13:56,  1.50it/s][A
 33%|███▎      | 607/1861 [06:44<13:56,  1.50it/s][A
 33%|███▎      | 608/1861 [06:45<13:55,  1.50it/s][A
 33%|███▎      | 609/1861 [06:46<13:55,  1.50it/s][A
 33%|███▎      | 610/1861 [06:46<13:54,  1.50it/s][A
 33%|███▎      | 611/1861 [06:47<13:53,  1.50it/s][A
 33%|███▎      | 612/1861 [06:48<13:53,  1.50it/s][A
 33%|███▎      | 613/1861 [06:48<13:52,  1.50it/s][A
 33%|███▎      | 614/1861 [06:49<13:52,  1.50it/s][A
 33%|███▎      | 615/1861 [06:50<13:51,  1.50it/s][A
 33%|███▎      | 616/1861 [06:51<13:50,  1.50it/s][A
 33%|███▎      | 617/1861 [06:51<13:50,  1.50it/s][A
 33%|███▎      | 618/1861 [06:52<13:49,  1.50it/s][A
 33%|███▎      | 619/1861 [

Step: 700 | Loss: 1.5771201420681817 | Train PPL: 4.84099434140984
Wie würde eine solche Zukunft aussehen ? --> How would a look like this ? </s>



 38%|███▊      | 701/1861 [07:45<12:50,  1.51it/s][A
 38%|███▊      | 702/1861 [07:46<12:49,  1.51it/s][A
 38%|███▊      | 703/1861 [07:46<12:48,  1.51it/s][A
 38%|███▊      | 704/1861 [07:47<12:47,  1.51it/s][A
 38%|███▊      | 705/1861 [07:47<12:46,  1.51it/s][A
 38%|███▊      | 706/1861 [07:48<12:46,  1.51it/s][A
 38%|███▊      | 707/1861 [07:48<12:45,  1.51it/s][A
 38%|███▊      | 708/1861 [07:49<12:44,  1.51it/s][A
 38%|███▊      | 709/1861 [07:50<12:44,  1.51it/s][A
 38%|███▊      | 710/1861 [07:50<12:43,  1.51it/s][A
 38%|███▊      | 711/1861 [07:51<12:42,  1.51it/s][A
 38%|███▊      | 712/1861 [07:52<12:42,  1.51it/s][A
 38%|███▊      | 713/1861 [07:53<12:41,  1.51it/s][A
 38%|███▊      | 714/1861 [07:53<12:40,  1.51it/s][A
 38%|███▊      | 715/1861 [07:54<12:40,  1.51it/s][A
 38%|███▊      | 716/1861 [07:55<12:39,  1.51it/s][A
 39%|███▊      | 717/1861 [07:55<12:39,  1.51it/s][A
 39%|███▊      | 718/1861 [07:56<12:38,  1.51it/s][A
 39%|███▊      | 719/1861 [

Step: 800 | Loss: 1.5833430540934206 | Train PPL: 4.871213350911645
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 43%|████▎     | 801/1861 [08:50<11:41,  1.51it/s][A
 43%|████▎     | 802/1861 [08:50<11:40,  1.51it/s][A
 43%|████▎     | 803/1861 [08:51<11:40,  1.51it/s][A
 43%|████▎     | 804/1861 [08:52<11:39,  1.51it/s][A
 43%|████▎     | 805/1861 [08:52<11:38,  1.51it/s][A
 43%|████▎     | 806/1861 [08:53<11:37,  1.51it/s][A
 43%|████▎     | 807/1861 [08:53<11:37,  1.51it/s][A
 43%|████▎     | 808/1861 [08:54<11:36,  1.51it/s][A
 43%|████▎     | 809/1861 [08:54<11:35,  1.51it/s][A
 44%|████▎     | 810/1861 [08:55<11:34,  1.51it/s][A
 44%|████▎     | 811/1861 [08:56<11:34,  1.51it/s][A
 44%|████▎     | 812/1861 [08:56<11:33,  1.51it/s][A
 44%|████▎     | 813/1861 [08:57<11:32,  1.51it/s][A
 44%|████▎     | 814/1861 [08:58<11:32,  1.51it/s][A
 44%|████▍     | 815/1861 [08:58<11:31,  1.51it/s][A
 44%|████▍     | 816/1861 [08:59<11:30,  1.51it/s][A
 44%|████▍     | 817/1861 [08:59<11:29,  1.51it/s][A
 44%|████▍     | 818/1861 [09:00<11:29,  1.51it/s][A
 44%|████▍     | 819/1861 [

Step: 900 | Loss: 1.5848689222335814 | Train PPL: 4.878651853811781
Wie würde eine solche Zukunft aussehen ? --> How would a call look like this ? </s>



 48%|████▊     | 901/1861 [09:54<10:33,  1.51it/s][A
 48%|████▊     | 902/1861 [09:55<10:32,  1.52it/s][A
 49%|████▊     | 903/1861 [09:56<10:32,  1.51it/s][A
 49%|████▊     | 904/1861 [09:56<10:31,  1.51it/s][A
 49%|████▊     | 905/1861 [09:57<10:30,  1.52it/s][A
 49%|████▊     | 906/1861 [09:57<10:30,  1.52it/s][A
 49%|████▊     | 907/1861 [09:58<10:29,  1.52it/s][A
 49%|████▉     | 908/1861 [09:59<10:29,  1.52it/s][A
 49%|████▉     | 909/1861 [10:00<10:28,  1.51it/s][A
 49%|████▉     | 910/1861 [10:00<10:27,  1.52it/s][A
 49%|████▉     | 911/1861 [10:01<10:27,  1.52it/s][A
 49%|████▉     | 912/1861 [10:01<10:26,  1.52it/s][A
 49%|████▉     | 913/1861 [10:02<10:25,  1.52it/s][A
 49%|████▉     | 914/1861 [10:02<10:24,  1.52it/s][A
 49%|████▉     | 915/1861 [10:03<10:23,  1.52it/s][A
 49%|████▉     | 916/1861 [10:03<10:22,  1.52it/s][A
 49%|████▉     | 917/1861 [10:04<10:22,  1.52it/s][A
 49%|████▉     | 918/1861 [10:05<10:21,  1.52it/s][A
 49%|████▉     | 919/1861 [

Step: 1000 | Loss: 1.585605664998293 | Train PPL: 4.882247489633322 | Val PPL: 5.195302101161378
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 54%|█████▍    | 1001/1861 [11:01<09:28,  1.51it/s][A
 54%|█████▍    | 1002/1861 [11:02<09:27,  1.51it/s][A
 54%|█████▍    | 1003/1861 [11:02<09:26,  1.51it/s][A
 54%|█████▍    | 1004/1861 [11:03<09:26,  1.51it/s][A
 54%|█████▍    | 1005/1861 [11:03<09:25,  1.51it/s][A
 54%|█████▍    | 1006/1861 [11:04<09:24,  1.51it/s][A
 54%|█████▍    | 1007/1861 [11:04<09:23,  1.51it/s][A
 54%|█████▍    | 1008/1861 [11:05<09:23,  1.51it/s][A
 54%|█████▍    | 1009/1861 [11:06<09:22,  1.51it/s][A
 54%|█████▍    | 1010/1861 [11:06<09:21,  1.51it/s][A
 54%|█████▍    | 1011/1861 [11:07<09:21,  1.51it/s][A
 54%|█████▍    | 1012/1861 [11:08<09:20,  1.51it/s][A
 54%|█████▍    | 1013/1861 [11:08<09:19,  1.51it/s][A
 54%|█████▍    | 1014/1861 [11:09<09:19,  1.51it/s][A
 55%|█████▍    | 1015/1861 [11:10<09:18,  1.51it/s][A
 55%|█████▍    | 1016/1861 [11:11<09:18,  1.51it/s][A
 55%|█████▍    | 1017/1861 [11:11<09:17,  1.51it/s][A
 55%|█████▍    | 1018/1861 [11:12<09:16,  1.51it/s][A
 55%|████

Step: 1100 | Loss: 1.5823113359646364 | Train PPL: 4.86619022345973
Wie würde eine solche Zukunft aussehen ? --> How would a look like this ? </s>



 59%|█████▉    | 1101/1861 [12:07<08:22,  1.51it/s][A
 59%|█████▉    | 1102/1861 [12:08<08:21,  1.51it/s][A
 59%|█████▉    | 1103/1861 [12:08<08:20,  1.51it/s][A
 59%|█████▉    | 1104/1861 [12:09<08:20,  1.51it/s][A
 59%|█████▉    | 1105/1861 [12:09<08:19,  1.51it/s][A
 59%|█████▉    | 1106/1861 [12:10<08:18,  1.51it/s][A
 59%|█████▉    | 1107/1861 [12:11<08:18,  1.51it/s][A
 60%|█████▉    | 1108/1861 [12:11<08:17,  1.51it/s][A
 60%|█████▉    | 1109/1861 [12:12<08:16,  1.51it/s][A
 60%|█████▉    | 1110/1861 [12:13<08:16,  1.51it/s][A
 60%|█████▉    | 1111/1861 [12:13<08:15,  1.51it/s][A
 60%|█████▉    | 1112/1861 [12:14<08:14,  1.51it/s][A
 60%|█████▉    | 1113/1861 [12:15<08:14,  1.51it/s][A
 60%|█████▉    | 1114/1861 [12:16<08:13,  1.51it/s][A
 60%|█████▉    | 1115/1861 [12:16<08:12,  1.51it/s][A
 60%|█████▉    | 1116/1861 [12:17<08:12,  1.51it/s][A
 60%|██████    | 1117/1861 [12:17<08:11,  1.51it/s][A
 60%|██████    | 1118/1861 [12:18<08:10,  1.51it/s][A
 60%|████

Step: 1200 | Loss: 1.583020194992423 | Train PPL: 4.869640889203848
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 65%|██████▍   | 1201/1861 [13:12<07:15,  1.52it/s][A
 65%|██████▍   | 1202/1861 [13:12<07:14,  1.52it/s][A
 65%|██████▍   | 1203/1861 [13:13<07:14,  1.52it/s][A
 65%|██████▍   | 1204/1861 [13:14<07:13,  1.52it/s][A
 65%|██████▍   | 1205/1861 [13:14<07:12,  1.52it/s][A
 65%|██████▍   | 1206/1861 [13:15<07:11,  1.52it/s][A
 65%|██████▍   | 1207/1861 [13:15<07:11,  1.52it/s][A
 65%|██████▍   | 1208/1861 [13:16<07:10,  1.52it/s][A
 65%|██████▍   | 1209/1861 [13:17<07:09,  1.52it/s][A
 65%|██████▌   | 1210/1861 [13:17<07:09,  1.52it/s][A
 65%|██████▌   | 1211/1861 [13:18<07:08,  1.52it/s][A
 65%|██████▌   | 1212/1861 [13:19<07:08,  1.52it/s][A
 65%|██████▌   | 1213/1861 [13:19<07:07,  1.52it/s][A
 65%|██████▌   | 1214/1861 [13:20<07:06,  1.52it/s][A
 65%|██████▌   | 1215/1861 [13:21<07:05,  1.52it/s][A
 65%|██████▌   | 1216/1861 [13:21<07:05,  1.52it/s][A
 65%|██████▌   | 1217/1861 [13:22<07:04,  1.52it/s][A
 65%|██████▌   | 1218/1861 [13:22<07:03,  1.52it/s][A
 66%|████

Step: 1300 | Loss: 1.5843968495268088 | Train PPL: 4.876349318950443
Wie würde eine solche Zukunft aussehen ? --> How would a look like this ? </s>



 70%|██████▉   | 1301/1861 [14:17<06:08,  1.52it/s][A
 70%|██████▉   | 1302/1861 [14:17<06:08,  1.52it/s][A
 70%|███████   | 1303/1861 [14:18<06:07,  1.52it/s][A
 70%|███████   | 1304/1861 [14:19<06:06,  1.52it/s][A
 70%|███████   | 1305/1861 [14:19<06:06,  1.52it/s][A
 70%|███████   | 1306/1861 [14:20<06:05,  1.52it/s][A
 70%|███████   | 1307/1861 [14:20<06:04,  1.52it/s][A
 70%|███████   | 1308/1861 [14:21<06:04,  1.52it/s][A
 70%|███████   | 1309/1861 [14:22<06:03,  1.52it/s][A
 70%|███████   | 1310/1861 [14:22<06:02,  1.52it/s][A
 70%|███████   | 1311/1861 [14:23<06:02,  1.52it/s][A
 70%|███████   | 1312/1861 [14:24<06:01,  1.52it/s][A
 71%|███████   | 1313/1861 [14:24<06:00,  1.52it/s][A
 71%|███████   | 1314/1861 [14:25<06:00,  1.52it/s][A
 71%|███████   | 1315/1861 [14:26<05:59,  1.52it/s][A
 71%|███████   | 1316/1861 [14:26<05:58,  1.52it/s][A
 71%|███████   | 1317/1861 [14:27<05:58,  1.52it/s][A
 71%|███████   | 1318/1861 [14:28<05:57,  1.52it/s][A
 71%|████

Step: 1400 | Loss: 1.5830916795134544 | Train PPL: 4.869989005592726
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this ? </s>



 75%|███████▌  | 1401/1861 [54:21<17:50,  2.33s/it][A
 75%|███████▌  | 1402/1861 [54:21<17:47,  2.33s/it][A
 75%|███████▌  | 1403/1861 [54:22<17:44,  2.33s/it][A
 75%|███████▌  | 1404/1861 [54:22<17:42,  2.32s/it][A
 75%|███████▌  | 1405/1861 [54:23<17:39,  2.32s/it][A
 76%|███████▌  | 1406/1861 [54:24<17:36,  2.32s/it][A
 76%|███████▌  | 1407/1861 [54:24<17:33,  2.32s/it][A
 76%|███████▌  | 1408/1861 [54:25<17:30,  2.32s/it][A
 76%|███████▌  | 1409/1861 [54:26<17:27,  2.32s/it][A
 76%|███████▌  | 1410/1861 [54:27<17:24,  2.32s/it][A
 76%|███████▌  | 1411/1861 [54:27<17:22,  2.32s/it][A
 76%|███████▌  | 1412/1861 [54:28<17:19,  2.31s/it][A
 76%|███████▌  | 1413/1861 [54:28<17:16,  2.31s/it][A
 76%|███████▌  | 1414/1861 [54:29<17:13,  2.31s/it][A
 76%|███████▌  | 1415/1861 [54:33<17:11,  2.31s/it][A
 76%|███████▌  | 1416/1861 [54:40<17:10,  2.32s/it][A
 76%|███████▌  | 1417/1861 [54:41<17:08,  2.32s/it][A
 76%|███████▌  | 1418/1861 [54:42<17:05,  2.32s/it][A
 76%|████

Step: 1500 | Loss: 1.5822297787865003 | Train PPL: 4.865793366900302 | Val PPL: 5.025158411421023
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 81%|████████  | 1501/1861 [7:15:44<1:44:30, 17.42s/it][A
 81%|████████  | 1502/1861 [7:15:44<1:44:09, 17.41s/it][A
 81%|████████  | 1503/1861 [7:15:45<1:43:47, 17.40s/it][A
 81%|████████  | 1504/1861 [7:15:46<1:43:26, 17.38s/it][A
 81%|████████  | 1505/1861 [7:15:46<1:43:04, 17.37s/it][A
 81%|████████  | 1506/1861 [7:15:47<1:42:43, 17.36s/it][A
 81%|████████  | 1507/1861 [7:15:48<1:42:22, 17.35s/it][A
 81%|████████  | 1508/1861 [9:15:36<2:10:03, 22.11s/it][A
 81%|████████  | 1509/1861 [9:15:37<2:09:36, 22.09s/it][A
 81%|████████  | 1510/1861 [9:15:38<2:09:09, 22.08s/it][A
 81%|████████  | 1511/1861 [9:15:39<2:08:42, 22.06s/it][A
 81%|████████  | 1512/1861 [9:15:40<2:08:15, 22.05s/it][A
 81%|████████▏ | 1513/1861 [9:15:40<2:07:48, 22.04s/it][A
 81%|████████▏ | 1514/1861 [9:15:41<2:07:21, 22.02s/it][A
 81%|████████▏ | 1515/1861 [9:15:44<2:06:55, 22.01s/it][A
 81%|████████▏ | 1516/1861 [9:15:52<2:06:30, 22.00s/it][A
 82%|████████▏ | 1517/1861 [9:15:54<2:06:03, 21.99s/it]

Step: 1600 | Loss: 1.5823508846759795 | Train PPL: 4.866382678817875
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 86%|████████▌ | 1601/1861 [11:43:15<1:54:12, 26.36s/it][A
 86%|████████▌ | 1602/1861 [11:43:16<1:53:41, 26.34s/it][A
 86%|████████▌ | 1603/1861 [11:43:16<1:53:11, 26.32s/it][A
 86%|████████▌ | 1604/1861 [11:43:17<1:52:41, 26.31s/it][A
 86%|████████▌ | 1605/1861 [11:43:17<1:52:10, 26.29s/it][A
 86%|████████▋ | 1606/1861 [11:43:18<1:51:40, 26.28s/it][A
 86%|████████▋ | 1607/1861 [11:43:19<1:51:09, 26.26s/it][A
 86%|████████▋ | 1608/1861 [11:43:19<1:50:39, 26.24s/it][A
 86%|████████▋ | 1609/1861 [11:43:20<1:50:09, 26.23s/it][A
 87%|████████▋ | 1610/1861 [11:43:20<1:49:39, 26.21s/it][A
 87%|████████▋ | 1611/1861 [11:43:21<1:49:08, 26.20s/it][A
 87%|████████▋ | 1612/1861 [11:43:22<1:48:38, 26.18s/it][A
 87%|████████▋ | 1613/1861 [11:43:22<1:48:08, 26.16s/it][A
 87%|████████▋ | 1614/1861 [11:43:23<1:47:38, 26.15s/it][A
 87%|████████▋ | 1615/1861 [11:43:24<1:47:08, 26.13s/it][A
 87%|████████▋ | 1616/1861 [11:43:25<1:46:38, 26.12s/it][A
 87%|████████▋ | 1617/1861 [11:43:25<1:

Step: 1700 | Loss: 1.5823479300211458 | Train PPL: 4.866368300358012
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this future ? </s>



 91%|█████████▏| 1701/1861 [11:44:25<1:06:15, 24.85s/it][A
 91%|█████████▏| 1702/1861 [11:44:26<1:05:48, 24.83s/it][A
 92%|█████████▏| 1703/1861 [11:44:27<1:05:21, 24.82s/it][A
 92%|█████████▏| 1704/1861 [11:44:27<1:04:54, 24.81s/it][A
 92%|█████████▏| 1705/1861 [11:44:28<1:04:27, 24.79s/it][A
 92%|█████████▏| 1706/1861 [11:44:29<1:04:00, 24.78s/it][A
 92%|█████████▏| 1707/1861 [11:44:30<1:03:33, 24.76s/it][A
 92%|█████████▏| 1708/1861 [11:44:30<1:03:06, 24.75s/it][A
 92%|█████████▏| 1709/1861 [11:44:31<1:02:39, 24.73s/it][A
 92%|█████████▏| 1710/1861 [11:44:32<1:02:12, 24.72s/it][A
 92%|█████████▏| 1711/1861 [11:44:33<1:01:46, 24.71s/it][A
 92%|█████████▏| 1712/1861 [11:44:33<1:01:19, 24.69s/it][A
 92%|█████████▏| 1713/1861 [11:44:34<1:00:52, 24.68s/it][A
 92%|█████████▏| 1714/1861 [11:44:35<1:00:25, 24.66s/it][A
 92%|█████████▏| 1715/1861 [11:44:35<59:58, 24.65s/it]  [A
 92%|█████████▏| 1716/1861 [11:44:36<59:32, 24.64s/it][A
 92%|█████████▏| 1717/1861 [11:44:37<59:0

Step: 1800 | Loss: 1.5818330551683903 | Train PPL: 4.863863374612885
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 97%|█████████▋| 1801/1861 [11:45:35<23:30, 23.51s/it][A
 97%|█████████▋| 1802/1861 [11:45:36<23:06, 23.49s/it][A
 97%|█████████▋| 1803/1861 [11:45:36<22:41, 23.48s/it][A
 97%|█████████▋| 1804/1861 [11:45:37<22:17, 23.47s/it][A
 97%|█████████▋| 1805/1861 [11:45:38<21:53, 23.46s/it][A
 97%|█████████▋| 1806/1861 [11:45:38<21:29, 23.44s/it][A
 97%|█████████▋| 1807/1861 [11:45:39<21:05, 23.43s/it][A
 97%|█████████▋| 1808/1861 [11:45:39<20:41, 23.42s/it][A
 97%|█████████▋| 1809/1861 [11:45:40<20:17, 23.41s/it][A
 97%|█████████▋| 1810/1861 [11:45:41<19:53, 23.39s/it][A
 97%|█████████▋| 1811/1861 [11:45:41<19:29, 23.38s/it][A
 97%|█████████▋| 1812/1861 [11:45:42<19:05, 23.37s/it][A
 97%|█████████▋| 1813/1861 [11:45:43<18:41, 23.36s/it][A
 97%|█████████▋| 1814/1861 [11:45:43<18:17, 23.34s/it][A
 98%|█████████▊| 1815/1861 [11:45:44<17:53, 23.33s/it][A
 98%|█████████▊| 1816/1861 [11:45:45<17:29, 23.32s/it][A
 98%|█████████▊| 1817/1861 [11:45:46<17:05, 23.31s/it][A
 98%|████████

Epoch: 4 | Loss: 1.5815610448273838 | Train PPL: 4.862540533399149 | Val PPL: 5.013246154780949



  0%|          | 1/1861 [00:00<28:36,  1.08it/s][A
  0%|          | 2/1861 [00:01<26:03,  1.19it/s][A
  0%|          | 3/1861 [00:02<25:07,  1.23it/s][A
  0%|          | 4/1861 [00:03<24:41,  1.25it/s][A
  0%|          | 5/1861 [00:03<24:01,  1.29it/s][A
  0%|          | 6/1861 [00:04<23:27,  1.32it/s][A
  0%|          | 7/1861 [00:05<22:51,  1.35it/s][A
  0%|          | 8/1861 [00:05<22:52,  1.35it/s][A
  0%|          | 9/1861 [00:06<22:54,  1.35it/s][A
  1%|          | 10/1861 [00:07<22:56,  1.35it/s][A
  1%|          | 11/1861 [00:08<22:57,  1.34it/s][A
  1%|          | 12/1861 [00:08<22:53,  1.35it/s][A
  1%|          | 13/1861 [00:09<22:55,  1.34it/s][A
  1%|          | 14/1861 [00:10<22:27,  1.37it/s][A
  1%|          | 15/1861 [00:10<22:20,  1.38it/s][A
  1%|          | 16/1861 [00:11<22:09,  1.39it/s][A
  1%|          | 17/1861 [00:12<22:12,  1.38it/s][A
  1%|          | 18/1861 [00:12<21:38,  1.42it/s][A
  1%|          | 19/1861 [00:13<21:40,  1.42it/s][A
 

Step: 100 | Loss: 1.4371426811814307 | Train PPL: 4.208653156464767
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this stuff ? </s>



  5%|▌         | 101/1861 [01:07<19:33,  1.50it/s][A
  5%|▌         | 102/1861 [01:08<19:33,  1.50it/s][A
  6%|▌         | 103/1861 [01:08<19:34,  1.50it/s][A
  6%|▌         | 104/1861 [01:09<19:34,  1.50it/s][A
  6%|▌         | 105/1861 [01:10<19:36,  1.49it/s][A
  6%|▌         | 106/1861 [01:10<19:33,  1.50it/s][A
  6%|▌         | 107/1861 [01:11<19:32,  1.50it/s][A
  6%|▌         | 108/1861 [01:12<19:32,  1.49it/s][A
  6%|▌         | 109/1861 [01:12<19:32,  1.49it/s][A
  6%|▌         | 110/1861 [01:13<19:33,  1.49it/s][A
  6%|▌         | 111/1861 [01:14<19:34,  1.49it/s][A
  6%|▌         | 112/1861 [01:15<19:34,  1.49it/s][A
  6%|▌         | 113/1861 [01:15<19:35,  1.49it/s][A
  6%|▌         | 114/1861 [01:16<19:35,  1.49it/s][A
  6%|▌         | 115/1861 [01:17<19:35,  1.48it/s][A
  6%|▌         | 116/1861 [01:17<19:32,  1.49it/s][A
  6%|▋         | 117/1861 [01:18<19:30,  1.49it/s][A
  6%|▋         | 118/1861 [01:19<19:29,  1.49it/s][A
  6%|▋         | 119/1861 [

Step: 200 | Loss: 1.4441560155153275 | Train PPL: 4.238273595757138
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 11%|█         | 201/1861 [02:13<18:25,  1.50it/s][A
 11%|█         | 202/1861 [02:14<18:25,  1.50it/s][A
 11%|█         | 203/1861 [02:15<18:25,  1.50it/s][A
 11%|█         | 204/1861 [02:16<18:25,  1.50it/s][A
 11%|█         | 205/1861 [02:16<18:25,  1.50it/s][A
 11%|█         | 206/1861 [02:17<18:25,  1.50it/s][A
 11%|█         | 207/1861 [02:18<18:25,  1.50it/s][A
 11%|█         | 208/1861 [02:19<18:24,  1.50it/s][A
 11%|█         | 209/1861 [02:19<18:24,  1.50it/s][A
 11%|█▏        | 210/1861 [02:20<18:24,  1.49it/s][A
 11%|█▏        | 211/1861 [02:21<18:24,  1.49it/s][A
 11%|█▏        | 212/1861 [02:21<18:23,  1.49it/s][A
 11%|█▏        | 213/1861 [02:22<18:21,  1.50it/s][A
 11%|█▏        | 214/1861 [02:23<18:21,  1.50it/s][A
 12%|█▏        | 215/1861 [02:23<18:21,  1.49it/s][A
 12%|█▏        | 216/1861 [02:24<18:20,  1.49it/s][A
 12%|█▏        | 217/1861 [02:25<18:19,  1.50it/s][A
 12%|█▏        | 218/1861 [02:25<18:18,  1.50it/s][A
 12%|█▏        | 219/1861 [

Step: 300 | Loss: 1.447447857360045 | Train PPL: 4.252248310785438
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 16%|█▌        | 301/1861 [03:19<17:13,  1.51it/s][A
 16%|█▌        | 302/1861 [03:19<17:11,  1.51it/s][A
 16%|█▋        | 303/1861 [03:20<17:11,  1.51it/s][A
 16%|█▋        | 304/1861 [03:21<17:10,  1.51it/s][A
 16%|█▋        | 305/1861 [03:21<17:09,  1.51it/s][A
 16%|█▋        | 306/1861 [03:22<17:09,  1.51it/s][A
 16%|█▋        | 307/1861 [03:23<17:08,  1.51it/s][A
 17%|█▋        | 308/1861 [03:23<17:07,  1.51it/s][A
 17%|█▋        | 309/1861 [03:24<17:07,  1.51it/s][A
 17%|█▋        | 310/1861 [03:25<17:07,  1.51it/s][A
 17%|█▋        | 311/1861 [03:25<17:05,  1.51it/s][A
 17%|█▋        | 312/1861 [03:26<17:05,  1.51it/s][A
 17%|█▋        | 313/1861 [03:27<17:04,  1.51it/s][A
 17%|█▋        | 314/1861 [03:27<17:03,  1.51it/s][A
 17%|█▋        | 315/1861 [03:28<17:02,  1.51it/s][A
 17%|█▋        | 316/1861 [03:29<17:02,  1.51it/s][A
 17%|█▋        | 317/1861 [03:29<17:02,  1.51it/s][A
 17%|█▋        | 318/1861 [03:30<17:01,  1.51it/s][A
 17%|█▋        | 319/1861 [

Step: 400 | Loss: 1.451035370528698 | Train PPL: 4.267530704099712
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this is what ? </s>



 22%|██▏       | 401/1861 [04:24<16:02,  1.52it/s][A
 22%|██▏       | 402/1861 [04:25<16:02,  1.52it/s][A
 22%|██▏       | 403/1861 [04:25<16:02,  1.52it/s][A
 22%|██▏       | 404/1861 [04:26<16:01,  1.52it/s][A
 22%|██▏       | 405/1861 [04:27<16:00,  1.52it/s][A
 22%|██▏       | 406/1861 [04:27<16:00,  1.52it/s][A
 22%|██▏       | 407/1861 [04:28<15:59,  1.51it/s][A
 22%|██▏       | 408/1861 [04:29<15:59,  1.51it/s][A
 22%|██▏       | 409/1861 [04:29<15:57,  1.52it/s][A
 22%|██▏       | 410/1861 [04:30<15:57,  1.52it/s][A
 22%|██▏       | 411/1861 [04:31<15:57,  1.52it/s][A
 22%|██▏       | 412/1861 [04:31<15:56,  1.52it/s][A
 22%|██▏       | 413/1861 [04:32<15:55,  1.52it/s][A
 22%|██▏       | 414/1861 [04:33<15:54,  1.52it/s][A
 22%|██▏       | 415/1861 [04:33<15:54,  1.52it/s][A
 22%|██▏       | 416/1861 [04:34<15:52,  1.52it/s][A
 22%|██▏       | 417/1861 [04:34<15:52,  1.52it/s][A
 22%|██▏       | 418/1861 [04:35<15:51,  1.52it/s][A
 23%|██▎       | 419/1861 [

Step: 500 | Loss: 1.449014308810234 | Train PPL: 4.258914471063547 | Val PPL: 4.990160111639556
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 27%|██▋       | 501/1861 [05:32<15:02,  1.51it/s][A
 27%|██▋       | 502/1861 [05:33<15:01,  1.51it/s][A
 27%|██▋       | 503/1861 [05:33<15:01,  1.51it/s][A
 27%|██▋       | 504/1861 [05:34<14:59,  1.51it/s][A
 27%|██▋       | 505/1861 [05:34<14:59,  1.51it/s][A
 27%|██▋       | 506/1861 [05:35<14:58,  1.51it/s][A
 27%|██▋       | 507/1861 [05:36<14:58,  1.51it/s][A
 27%|██▋       | 508/1861 [05:37<14:57,  1.51it/s][A
 27%|██▋       | 509/1861 [05:37<14:57,  1.51it/s][A
 27%|██▋       | 510/1861 [05:38<14:56,  1.51it/s][A
 27%|██▋       | 511/1861 [05:39<14:56,  1.51it/s][A
 28%|██▊       | 512/1861 [05:39<14:55,  1.51it/s][A
 28%|██▊       | 513/1861 [05:40<14:54,  1.51it/s][A
 28%|██▊       | 514/1861 [05:41<14:53,  1.51it/s][A
 28%|██▊       | 515/1861 [05:41<14:52,  1.51it/s][A
 28%|██▊       | 516/1861 [05:42<14:52,  1.51it/s][A
 28%|██▊       | 517/1861 [05:42<14:51,  1.51it/s][A
 28%|██▊       | 518/1861 [05:43<14:50,  1.51it/s][A
 28%|██▊       | 519/1861 [

Step: 600 | Loss: 1.449830927103758 | Train PPL: 4.26239379897923
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 32%|███▏      | 601/1861 [06:36<13:51,  1.51it/s][A
 32%|███▏      | 602/1861 [06:37<13:51,  1.51it/s][A
 32%|███▏      | 603/1861 [06:38<13:50,  1.51it/s][A
 32%|███▏      | 604/1861 [06:38<13:49,  1.51it/s][A
 33%|███▎      | 605/1861 [06:39<13:49,  1.51it/s][A
 33%|███▎      | 606/1861 [06:39<13:48,  1.52it/s][A
 33%|███▎      | 607/1861 [06:40<13:47,  1.51it/s][A
 33%|███▎      | 608/1861 [06:41<13:47,  1.51it/s][A
 33%|███▎      | 609/1861 [06:41<13:46,  1.52it/s][A
 33%|███▎      | 610/1861 [06:42<13:45,  1.52it/s][A
 33%|███▎      | 611/1861 [06:43<13:44,  1.52it/s][A
 33%|███▎      | 612/1861 [06:43<13:44,  1.52it/s][A
 33%|███▎      | 613/1861 [06:44<13:43,  1.52it/s][A
 33%|███▎      | 614/1861 [06:44<13:42,  1.52it/s][A
 33%|███▎      | 615/1861 [06:45<13:41,  1.52it/s][A
 33%|███▎      | 616/1861 [06:46<13:40,  1.52it/s][A
 33%|███▎      | 617/1861 [06:46<13:40,  1.52it/s][A
 33%|███▎      | 618/1861 [06:47<13:39,  1.52it/s][A
 33%|███▎      | 619/1861 [

Step: 700 | Loss: 1.452415253179414 | Train PPL: 4.2734234604007675
Wie würde eine solche Zukunft aussehen ? --> How would a sense look like this ? </s>



 38%|███▊      | 701/1861 [07:41<12:43,  1.52it/s][A
 38%|███▊      | 702/1861 [07:42<12:43,  1.52it/s][A
 38%|███▊      | 703/1861 [07:43<12:42,  1.52it/s][A
 38%|███▊      | 704/1861 [07:43<12:42,  1.52it/s][A
 38%|███▊      | 705/1861 [07:44<12:41,  1.52it/s][A
 38%|███▊      | 706/1861 [07:44<12:40,  1.52it/s][A
 38%|███▊      | 707/1861 [07:45<12:39,  1.52it/s][A
 38%|███▊      | 708/1861 [07:46<12:39,  1.52it/s][A
 38%|███▊      | 709/1861 [07:46<12:38,  1.52it/s][A
 38%|███▊      | 710/1861 [07:47<12:37,  1.52it/s][A
 38%|███▊      | 711/1861 [07:47<12:36,  1.52it/s][A
 38%|███▊      | 712/1861 [07:48<12:35,  1.52it/s][A
 38%|███▊      | 713/1861 [07:49<12:35,  1.52it/s][A
 38%|███▊      | 714/1861 [07:49<12:34,  1.52it/s][A
 38%|███▊      | 715/1861 [07:50<12:34,  1.52it/s][A
 38%|███▊      | 716/1861 [07:51<12:33,  1.52it/s][A
 39%|███▊      | 717/1861 [07:51<12:32,  1.52it/s][A
 39%|███▊      | 718/1861 [07:52<12:32,  1.52it/s][A
 39%|███▊      | 719/1861 [

Step: 800 | Loss: 1.455419533662498 | Train PPL: 4.286281327747828
Wie würde eine solche Zukunft aussehen ? --> How would a point like this ? </s>



 43%|████▎     | 801/1861 [08:46<11:36,  1.52it/s][A
 43%|████▎     | 802/1861 [08:47<11:36,  1.52it/s][A
 43%|████▎     | 803/1861 [08:47<11:35,  1.52it/s][A
 43%|████▎     | 804/1861 [08:48<11:34,  1.52it/s][A
 43%|████▎     | 805/1861 [08:48<11:33,  1.52it/s][A
 43%|████▎     | 806/1861 [08:49<11:33,  1.52it/s][A
 43%|████▎     | 807/1861 [08:50<11:32,  1.52it/s][A
 43%|████▎     | 808/1861 [08:50<11:31,  1.52it/s][A
 43%|████▎     | 809/1861 [08:51<11:31,  1.52it/s][A
 44%|████▎     | 810/1861 [08:52<11:30,  1.52it/s][A
 44%|████▎     | 811/1861 [08:52<11:29,  1.52it/s][A
 44%|████▎     | 812/1861 [08:53<11:29,  1.52it/s][A
 44%|████▎     | 813/1861 [08:54<11:28,  1.52it/s][A
 44%|████▎     | 814/1861 [08:54<11:27,  1.52it/s][A
 44%|████▍     | 815/1861 [08:55<11:27,  1.52it/s][A
 44%|████▍     | 816/1861 [08:55<11:26,  1.52it/s][A
 44%|████▍     | 817/1861 [08:56<11:25,  1.52it/s][A
 44%|████▍     | 818/1861 [08:57<11:25,  1.52it/s][A
 44%|████▍     | 819/1861 [

Step: 900 | Loss: 1.4558778024050925 | Train PPL: 4.288246046652548
Wie würde eine solche Zukunft aussehen ? --> How would a view look like ? </s>



 48%|████▊     | 901/1861 [09:51<10:30,  1.52it/s][A
 48%|████▊     | 902/1861 [09:52<10:29,  1.52it/s][A
 49%|████▊     | 903/1861 [09:52<10:28,  1.52it/s][A
 49%|████▊     | 904/1861 [09:53<10:28,  1.52it/s][A
 49%|████▊     | 905/1861 [09:54<10:27,  1.52it/s][A
 49%|████▊     | 906/1861 [09:54<10:26,  1.52it/s][A
 49%|████▊     | 907/1861 [09:55<10:25,  1.52it/s][A
 49%|████▉     | 908/1861 [09:55<10:25,  1.52it/s][A
 49%|████▉     | 909/1861 [09:56<10:24,  1.52it/s][A
 49%|████▉     | 910/1861 [09:56<10:23,  1.53it/s][A
 49%|████▉     | 911/1861 [09:57<10:22,  1.53it/s][A
 49%|████▉     | 912/1861 [09:57<10:22,  1.53it/s][A
 49%|████▉     | 913/1861 [09:58<10:21,  1.53it/s][A
 49%|████▉     | 914/1861 [09:58<10:20,  1.53it/s][A
 49%|████▉     | 915/1861 [09:59<10:20,  1.53it/s][A
 49%|████▉     | 916/1861 [10:00<10:19,  1.53it/s][A
 49%|████▉     | 917/1861 [10:00<10:18,  1.53it/s][A
 49%|████▉     | 918/1861 [10:01<10:17,  1.53it/s][A
 49%|████▉     | 919/1861 [

Step: 1000 | Loss: 1.4575994711518288 | Train PPL: 4.295635344986141 | Val PPL: 4.934625253906635
Wie würde eine solche Zukunft aussehen ? --> How would a such look like this ? </s>



 54%|█████▍    | 1001/1861 [10:58<09:25,  1.52it/s][A
 54%|█████▍    | 1002/1861 [10:59<09:25,  1.52it/s][A
 54%|█████▍    | 1003/1861 [11:00<09:24,  1.52it/s][A
 54%|█████▍    | 1004/1861 [11:00<09:23,  1.52it/s][A
 54%|█████▍    | 1005/1861 [11:01<09:23,  1.52it/s][A
 54%|█████▍    | 1006/1861 [11:01<09:22,  1.52it/s][A
 54%|█████▍    | 1007/1861 [11:02<09:21,  1.52it/s][A
 54%|█████▍    | 1008/1861 [11:02<09:20,  1.52it/s][A
 54%|█████▍    | 1009/1861 [11:03<09:20,  1.52it/s][A
 54%|█████▍    | 1010/1861 [11:04<09:19,  1.52it/s][A
 54%|█████▍    | 1011/1861 [11:04<09:19,  1.52it/s][A
 54%|█████▍    | 1012/1861 [11:05<09:18,  1.52it/s][A
 54%|█████▍    | 1013/1861 [11:06<09:17,  1.52it/s][A
 54%|█████▍    | 1014/1861 [11:07<09:17,  1.52it/s][A
 55%|█████▍    | 1015/1861 [11:07<09:16,  1.52it/s][A
 55%|█████▍    | 1016/1861 [11:08<09:15,  1.52it/s][A
 55%|█████▍    | 1017/1861 [11:08<09:15,  1.52it/s][A
 55%|█████▍    | 1018/1861 [11:09<09:14,  1.52it/s][A
 55%|████

Step: 1100 | Loss: 1.4592586041038687 | Train PPL: 4.302768290753387
Wie würde eine solche Zukunft aussehen ? --> How would a sense of these are going to look ? </s>



 59%|█████▉    | 1101/1861 [12:03<08:19,  1.52it/s][A
 59%|█████▉    | 1102/1861 [12:04<08:19,  1.52it/s][A
 59%|█████▉    | 1103/1861 [12:05<08:18,  1.52it/s][A
 59%|█████▉    | 1104/1861 [12:05<08:17,  1.52it/s][A
 59%|█████▉    | 1105/1861 [12:06<08:16,  1.52it/s][A
 59%|█████▉    | 1106/1861 [12:06<08:16,  1.52it/s][A
 59%|█████▉    | 1107/1861 [12:07<08:15,  1.52it/s][A
 60%|█████▉    | 1108/1861 [12:08<08:14,  1.52it/s][A
 60%|█████▉    | 1109/1861 [12:09<08:14,  1.52it/s][A
 60%|█████▉    | 1110/1861 [12:09<08:13,  1.52it/s][A
 60%|█████▉    | 1111/1861 [12:10<08:13,  1.52it/s][A
 60%|█████▉    | 1112/1861 [12:11<08:12,  1.52it/s][A
 60%|█████▉    | 1113/1861 [12:11<08:11,  1.52it/s][A
 60%|█████▉    | 1114/1861 [12:12<08:11,  1.52it/s][A
 60%|█████▉    | 1115/1861 [12:13<08:10,  1.52it/s][A
 60%|█████▉    | 1116/1861 [12:13<08:09,  1.52it/s][A
 60%|██████    | 1117/1861 [12:14<08:09,  1.52it/s][A
 60%|██████    | 1118/1861 [12:15<08:08,  1.52it/s][A
 60%|████

Step: 1200 | Loss: 1.4610831443965435 | Train PPL: 4.310626031071988
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 65%|██████▍   | 1201/1861 [13:07<07:13,  1.52it/s][A
 65%|██████▍   | 1202/1861 [13:08<07:12,  1.52it/s][A
 65%|██████▍   | 1203/1861 [13:09<07:11,  1.52it/s][A
 65%|██████▍   | 1204/1861 [13:09<07:11,  1.52it/s][A
 65%|██████▍   | 1205/1861 [13:10<07:10,  1.52it/s][A
 65%|██████▍   | 1206/1861 [13:11<07:09,  1.52it/s][A
 65%|██████▍   | 1207/1861 [13:11<07:09,  1.52it/s][A
 65%|██████▍   | 1208/1861 [13:12<07:08,  1.52it/s][A
 65%|██████▍   | 1209/1861 [13:13<07:07,  1.52it/s][A
 65%|██████▌   | 1210/1861 [13:13<07:07,  1.52it/s][A
 65%|██████▌   | 1211/1861 [13:14<07:06,  1.52it/s][A
 65%|██████▌   | 1212/1861 [13:15<07:05,  1.52it/s][A
 65%|██████▌   | 1213/1861 [13:15<07:05,  1.52it/s][A
 65%|██████▌   | 1214/1861 [13:16<07:04,  1.52it/s][A
 65%|██████▌   | 1215/1861 [13:16<07:03,  1.52it/s][A
 65%|██████▌   | 1216/1861 [13:17<07:03,  1.52it/s][A
 65%|██████▌   | 1217/1861 [13:18<07:02,  1.52it/s][A
 65%|██████▌   | 1218/1861 [13:18<07:01,  1.52it/s][A
 66%|████

Step: 1300 | Loss: 1.4603121679792037 | Train PPL: 4.307303920856932
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 70%|██████▉   | 1301/1861 [14:12<06:07,  1.53it/s][A
 70%|██████▉   | 1302/1861 [14:13<06:06,  1.53it/s][A
 70%|███████   | 1303/1861 [14:14<06:05,  1.53it/s][A
 70%|███████   | 1304/1861 [14:14<06:05,  1.53it/s][A
 70%|███████   | 1305/1861 [14:15<06:04,  1.53it/s][A
 70%|███████   | 1306/1861 [14:16<06:03,  1.53it/s][A
 70%|███████   | 1307/1861 [14:16<06:03,  1.53it/s][A
 70%|███████   | 1308/1861 [14:17<06:02,  1.53it/s][A
 70%|███████   | 1309/1861 [14:18<06:01,  1.53it/s][A
 70%|███████   | 1310/1861 [14:18<06:01,  1.53it/s][A
 70%|███████   | 1311/1861 [14:19<06:00,  1.53it/s][A
 70%|███████   | 1312/1861 [14:20<05:59,  1.53it/s][A
 71%|███████   | 1313/1861 [14:20<05:59,  1.53it/s][A
 71%|███████   | 1314/1861 [14:21<05:58,  1.53it/s][A
 71%|███████   | 1315/1861 [14:22<05:58,  1.53it/s][A
 71%|███████   | 1316/1861 [14:22<05:57,  1.53it/s][A
 71%|███████   | 1317/1861 [14:23<05:56,  1.53it/s][A
 71%|███████   | 1318/1861 [14:24<05:55,  1.53it/s][A
 71%|████

Step: 1400 | Loss: 1.4612625108659267 | Train PPL: 4.3113992821896
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 75%|███████▌  | 1401/1861 [15:17<05:01,  1.53it/s][A
 75%|███████▌  | 1402/1861 [15:18<05:00,  1.53it/s][A
 75%|███████▌  | 1403/1861 [15:18<04:59,  1.53it/s][A
 75%|███████▌  | 1404/1861 [15:19<04:59,  1.53it/s][A
 75%|███████▌  | 1405/1861 [15:19<04:58,  1.53it/s][A
 76%|███████▌  | 1406/1861 [15:20<04:57,  1.53it/s][A
 76%|███████▌  | 1407/1861 [15:21<04:57,  1.53it/s][A
 76%|███████▌  | 1408/1861 [15:21<04:56,  1.53it/s][A
 76%|███████▌  | 1409/1861 [15:22<04:56,  1.53it/s][A
 76%|███████▌  | 1410/1861 [15:23<04:55,  1.53it/s][A
 76%|███████▌  | 1411/1861 [15:24<04:54,  1.53it/s][A
 76%|███████▌  | 1412/1861 [15:24<04:54,  1.53it/s][A
 76%|███████▌  | 1413/1861 [15:25<04:53,  1.53it/s][A
 76%|███████▌  | 1414/1861 [15:25<04:52,  1.53it/s][A
 76%|███████▌  | 1415/1861 [15:26<04:52,  1.53it/s][A
 76%|███████▌  | 1416/1861 [15:27<04:51,  1.53it/s][A
 76%|███████▌  | 1417/1861 [15:27<04:50,  1.53it/s][A
 76%|███████▌  | 1418/1861 [15:28<04:49,  1.53it/s][A
 76%|████

Step: 1500 | Loss: 1.4613875538110732 | Train PPL: 4.311938425960908 | Val PPL: 4.873496116281091
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 81%|████████  | 1501/1861 [16:25<03:56,  1.52it/s][A
 81%|████████  | 1502/1861 [16:25<03:55,  1.52it/s][A
 81%|████████  | 1503/1861 [16:26<03:54,  1.52it/s][A
 81%|████████  | 1504/1861 [16:27<03:54,  1.52it/s][A
 81%|████████  | 1505/1861 [16:27<03:53,  1.52it/s][A
 81%|████████  | 1506/1861 [16:28<03:52,  1.52it/s][A
 81%|████████  | 1507/1861 [16:29<03:52,  1.52it/s][A
 81%|████████  | 1508/1861 [16:29<03:51,  1.52it/s][A
 81%|████████  | 1509/1861 [16:30<03:51,  1.52it/s][A
 81%|████████  | 1510/1861 [16:31<03:50,  1.52it/s][A
 81%|████████  | 1511/1861 [16:31<03:49,  1.52it/s][A
 81%|████████  | 1512/1861 [16:32<03:49,  1.52it/s][A
 81%|████████▏ | 1513/1861 [16:33<03:48,  1.52it/s][A
 81%|████████▏ | 1514/1861 [16:33<03:47,  1.52it/s][A
 81%|████████▏ | 1515/1861 [16:34<03:47,  1.52it/s][A
 81%|████████▏ | 1516/1861 [16:34<03:46,  1.52it/s][A
 82%|████████▏ | 1517/1861 [16:35<03:45,  1.52it/s][A
 82%|████████▏ | 1518/1861 [16:36<03:45,  1.52it/s][A
 82%|████

Step: 1600 | Loss: 1.4632577131502331 | Train PPL: 4.32000998307454
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 86%|████████▌ | 1601/1861 [17:29<02:50,  1.53it/s][A
 86%|████████▌ | 1602/1861 [17:30<02:49,  1.53it/s][A
 86%|████████▌ | 1603/1861 [17:30<02:49,  1.53it/s][A
 86%|████████▌ | 1604/1861 [17:31<02:48,  1.53it/s][A
 86%|████████▌ | 1605/1861 [17:32<02:47,  1.53it/s][A
 86%|████████▋ | 1606/1861 [17:33<02:47,  1.52it/s][A
 86%|████████▋ | 1607/1861 [17:33<02:46,  1.52it/s][A
 86%|████████▋ | 1608/1861 [17:34<02:45,  1.52it/s][A
 86%|████████▋ | 1609/1861 [17:35<02:45,  1.52it/s][A
 87%|████████▋ | 1610/1861 [17:36<02:44,  1.52it/s][A
 87%|████████▋ | 1611/1861 [17:36<02:43,  1.52it/s][A
 87%|████████▋ | 1612/1861 [17:37<02:43,  1.52it/s][A
 87%|████████▋ | 1613/1861 [17:38<02:42,  1.52it/s][A
 87%|████████▋ | 1614/1861 [17:38<02:42,  1.52it/s][A
 87%|████████▋ | 1615/1861 [17:39<02:41,  1.52it/s][A
 87%|████████▋ | 1616/1861 [17:40<02:40,  1.52it/s][A
 87%|████████▋ | 1617/1861 [17:40<02:40,  1.52it/s][A
 87%|████████▋ | 1618/1861 [17:41<02:39,  1.52it/s][A
 87%|████

Step: 1700 | Loss: 1.46457953498644 | Train PPL: 4.325724042254925
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 91%|█████████▏| 1701/1861 [18:34<01:44,  1.53it/s][A
 91%|█████████▏| 1702/1861 [18:35<01:44,  1.53it/s][A
 92%|█████████▏| 1703/1861 [18:35<01:43,  1.53it/s][A
 92%|█████████▏| 1704/1861 [18:36<01:42,  1.53it/s][A
 92%|█████████▏| 1705/1861 [18:37<01:42,  1.53it/s][A
 92%|█████████▏| 1706/1861 [18:38<01:41,  1.53it/s][A
 92%|█████████▏| 1707/1861 [18:38<01:40,  1.53it/s][A
 92%|█████████▏| 1708/1861 [18:39<01:40,  1.53it/s][A
 92%|█████████▏| 1709/1861 [18:39<01:39,  1.53it/s][A
 92%|█████████▏| 1710/1861 [18:40<01:38,  1.53it/s][A
 92%|█████████▏| 1711/1861 [18:41<01:38,  1.53it/s][A
 92%|█████████▏| 1712/1861 [18:42<01:37,  1.53it/s][A
 92%|█████████▏| 1713/1861 [18:42<01:36,  1.53it/s][A
 92%|█████████▏| 1714/1861 [18:43<01:36,  1.53it/s][A
 92%|█████████▏| 1715/1861 [18:43<01:35,  1.53it/s][A
 92%|█████████▏| 1716/1861 [18:44<01:34,  1.53it/s][A
 92%|█████████▏| 1717/1861 [18:44<01:34,  1.53it/s][A
 92%|█████████▏| 1718/1861 [18:45<01:33,  1.53it/s][A
 92%|████

Step: 1800 | Loss: 1.4651626552972528 | Train PPL: 4.328247195382358
Wie würde eine solche Zukunft aussehen ? --> How would a sense of the future look like ? </s>



 97%|█████████▋| 1801/1861 [19:39<00:39,  1.53it/s][A
 97%|█████████▋| 1802/1861 [19:40<00:38,  1.53it/s][A
 97%|█████████▋| 1803/1861 [19:41<00:37,  1.53it/s][A
 97%|█████████▋| 1804/1861 [19:41<00:37,  1.53it/s][A
 97%|█████████▋| 1805/1861 [19:42<00:36,  1.53it/s][A
 97%|█████████▋| 1806/1861 [19:43<00:36,  1.53it/s][A
 97%|█████████▋| 1807/1861 [19:43<00:35,  1.53it/s][A
 97%|█████████▋| 1808/1861 [19:44<00:34,  1.53it/s][A
 97%|█████████▋| 1809/1861 [19:44<00:34,  1.53it/s][A
 97%|█████████▋| 1810/1861 [19:45<00:33,  1.53it/s][A
 97%|█████████▋| 1811/1861 [19:46<00:32,  1.53it/s][A
 97%|█████████▋| 1812/1861 [19:47<00:32,  1.53it/s][A
 97%|█████████▋| 1813/1861 [19:47<00:31,  1.53it/s][A
 97%|█████████▋| 1814/1861 [19:48<00:30,  1.53it/s][A
 98%|█████████▊| 1815/1861 [19:49<00:30,  1.53it/s][A
 98%|█████████▊| 1816/1861 [19:49<00:29,  1.53it/s][A
 98%|█████████▊| 1817/1861 [19:49<00:28,  1.53it/s][A
 98%|█████████▊| 1818/1861 [19:50<00:28,  1.53it/s][A
 98%|████

Epoch: 5 | Loss: 1.4648957258747957 | Train PPL: 4.32709201304118 | Val PPL: 4.808466559438187



  0%|          | 1/1861 [00:00<21:28,  1.44it/s][A
  0%|          | 2/1861 [00:01<22:19,  1.39it/s][A
  0%|          | 3/1861 [00:02<21:36,  1.43it/s][A
  0%|          | 4/1861 [00:02<20:17,  1.53it/s][A
  0%|          | 5/1861 [00:03<20:48,  1.49it/s][A
  0%|          | 6/1861 [00:04<21:05,  1.47it/s][A
  0%|          | 7/1861 [00:04<21:27,  1.44it/s][A
  0%|          | 8/1861 [00:05<21:38,  1.43it/s][A
  0%|          | 9/1861 [00:06<20:50,  1.48it/s][A
  1%|          | 10/1861 [00:06<20:36,  1.50it/s][A
  1%|          | 11/1861 [00:07<20:43,  1.49it/s][A
  1%|          | 12/1861 [00:08<20:51,  1.48it/s][A
  1%|          | 13/1861 [00:08<20:49,  1.48it/s][A
  1%|          | 14/1861 [00:09<20:51,  1.48it/s][A
  1%|          | 15/1861 [00:09<20:30,  1.50it/s][A
  1%|          | 16/1861 [00:10<20:32,  1.50it/s][A
  1%|          | 17/1861 [00:11<20:41,  1.48it/s][A
  1%|          | 18/1861 [00:12<20:47,  1.48it/s][A
  1%|          | 19/1861 [00:12<20:41,  1.48it/s][A
 

Step: 100 | Loss: 1.3368436607718468 | Train PPL: 3.8070083121043234
Wie würde eine solche Zukunft aussehen ? --> How would a look like this ? </s>



  5%|▌         | 101/1861 [01:05<19:05,  1.54it/s][A
  5%|▌         | 102/1861 [01:06<19:06,  1.53it/s][A
  6%|▌         | 103/1861 [01:07<19:07,  1.53it/s][A
  6%|▌         | 104/1861 [01:07<19:08,  1.53it/s][A
  6%|▌         | 105/1861 [01:08<19:09,  1.53it/s][A
  6%|▌         | 106/1861 [01:09<19:06,  1.53it/s][A
  6%|▌         | 107/1861 [01:09<19:05,  1.53it/s][A
  6%|▌         | 108/1861 [01:10<19:03,  1.53it/s][A
  6%|▌         | 109/1861 [01:11<19:02,  1.53it/s][A
  6%|▌         | 110/1861 [01:11<19:02,  1.53it/s][A
  6%|▌         | 111/1861 [01:12<19:03,  1.53it/s][A
  6%|▌         | 112/1861 [01:13<19:00,  1.53it/s][A
  6%|▌         | 113/1861 [01:13<18:58,  1.54it/s][A
  6%|▌         | 114/1861 [01:14<18:58,  1.53it/s][A
  6%|▌         | 115/1861 [01:14<18:57,  1.53it/s][A
  6%|▌         | 116/1861 [01:15<18:56,  1.54it/s][A
  6%|▋         | 117/1861 [01:16<18:56,  1.53it/s][A
  6%|▋         | 118/1861 [01:16<18:52,  1.54it/s][A
  6%|▋         | 119/1861 [

Step: 200 | Loss: 1.341435405611992 | Train PPL: 3.824529318087654
Wie würde eine solche Zukunft aussehen ? --> How would a picture look like ? </s>



 11%|█         | 201/1861 [02:12<18:15,  1.51it/s][A
 11%|█         | 202/1861 [02:13<18:14,  1.52it/s][A
 11%|█         | 203/1861 [02:13<18:12,  1.52it/s][A
 11%|█         | 204/1861 [02:14<18:12,  1.52it/s][A
 11%|█         | 205/1861 [02:15<18:13,  1.51it/s][A
 11%|█         | 206/1861 [02:16<18:15,  1.51it/s][A
 11%|█         | 207/1861 [02:16<18:14,  1.51it/s][A
 11%|█         | 208/1861 [02:17<18:15,  1.51it/s][A
 11%|█         | 209/1861 [02:18<18:16,  1.51it/s][A
 11%|█▏        | 210/1861 [02:19<18:16,  1.51it/s][A
 11%|█▏        | 211/1861 [02:20<18:16,  1.50it/s][A
 11%|█▏        | 212/1861 [02:21<18:17,  1.50it/s][A
 11%|█▏        | 213/1861 [02:21<18:17,  1.50it/s][A
 11%|█▏        | 214/1861 [02:22<18:16,  1.50it/s][A
 12%|█▏        | 215/1861 [02:23<18:17,  1.50it/s][A
 12%|█▏        | 216/1861 [02:24<18:17,  1.50it/s][A
 12%|█▏        | 217/1861 [02:24<18:18,  1.50it/s][A
 12%|█▏        | 218/1861 [02:25<18:18,  1.50it/s][A
 12%|█▏        | 219/1861 [

Step: 300 | Loss: 1.3487159226338068 | Train PPL: 3.852475476687548
Wie würde eine solche Zukunft aussehen ? --> How would a sense of this . </s>



 16%|█▌        | 301/1861 [03:24<17:42,  1.47it/s][A
 16%|█▌        | 302/1861 [03:25<17:41,  1.47it/s][A
 16%|█▋        | 303/1861 [03:26<17:40,  1.47it/s][A
 16%|█▋        | 304/1861 [03:26<17:38,  1.47it/s][A
 16%|█▋        | 305/1861 [03:27<17:38,  1.47it/s][A
 16%|█▋        | 306/1861 [03:28<17:38,  1.47it/s][A
 16%|█▋        | 307/1861 [03:29<17:38,  1.47it/s][A
 17%|█▋        | 308/1861 [03:29<17:38,  1.47it/s][A
 17%|█▋        | 309/1861 [03:30<17:38,  1.47it/s][A
 17%|█▋        | 310/1861 [03:31<17:37,  1.47it/s][A
 17%|█▋        | 311/1861 [03:32<17:37,  1.47it/s][A
 17%|█▋        | 312/1861 [03:32<17:36,  1.47it/s][A
 17%|█▋        | 313/1861 [03:33<17:36,  1.47it/s][A
 17%|█▋        | 314/1861 [03:34<17:36,  1.46it/s][A
 17%|█▋        | 315/1861 [03:35<17:35,  1.46it/s][A
 17%|█▋        | 316/1861 [03:35<17:35,  1.46it/s][A
 17%|█▋        | 317/1861 [03:36<17:34,  1.46it/s][A
 17%|█▋        | 318/1861 [03:37<17:34,  1.46it/s][A
 17%|█▋        | 319/1861 [

Step: 400 | Loss: 1.349630792066455 | Train PPL: 3.8560016014671414
Wie würde eine solche Zukunft aussehen ? --> How would a view look like this ? </s>



 22%|██▏       | 401/1861 [04:34<16:39,  1.46it/s][A
 22%|██▏       | 402/1861 [04:35<16:38,  1.46it/s][A
 22%|██▏       | 403/1861 [04:35<16:38,  1.46it/s][A
 22%|██▏       | 404/1861 [04:36<16:37,  1.46it/s][A
 22%|██▏       | 405/1861 [04:37<16:36,  1.46it/s][A
 22%|██▏       | 406/1861 [04:37<16:35,  1.46it/s][A
 22%|██▏       | 407/1861 [04:38<16:34,  1.46it/s][A
 22%|██▏       | 408/1861 [04:39<16:33,  1.46it/s][A
 22%|██▏       | 409/1861 [04:39<16:33,  1.46it/s][A
 22%|██▏       | 410/1861 [04:40<16:32,  1.46it/s][A
 22%|██▏       | 411/1861 [04:41<16:32,  1.46it/s][A
 22%|██▏       | 412/1861 [04:41<16:31,  1.46it/s][A
 22%|██▏       | 413/1861 [04:42<16:30,  1.46it/s][A
 22%|██▏       | 414/1861 [04:43<16:30,  1.46it/s][A
 22%|██▏       | 415/1861 [04:43<16:29,  1.46it/s][A
 22%|██▏       | 416/1861 [04:44<16:28,  1.46it/s][A
 22%|██▏       | 417/1861 [04:45<16:27,  1.46it/s][A
 22%|██▏       | 418/1861 [04:45<16:26,  1.46it/s][A
 23%|██▎       | 419/1861 [

Step: 500 | Loss: 1.3511881521940232 | Train PPL: 3.862011463157912 | Val PPL: 4.8451336466436405
Wie würde eine solche Zukunft aussehen ? --> How would a future look like ? </s>



 27%|██▋       | 501/1861 [1:16:31<3:27:42,  9.16s/it][A
 27%|██▋       | 502/1861 [1:16:31<3:27:11,  9.15s/it][A
 27%|██▋       | 503/1861 [1:16:32<3:26:39,  9.13s/it][A
 27%|██▋       | 504/1861 [1:16:57<3:27:11,  9.16s/it][A
 27%|██▋       | 505/1861 [1:16:58<3:26:42,  9.15s/it][A
 27%|██▋       | 506/1861 [1:17:06<3:26:29,  9.14s/it][A
 27%|██▋       | 507/1861 [1:17:12<3:26:11,  9.14s/it][A
 27%|██▋       | 508/1861 [1:17:17<3:25:52,  9.13s/it][A
 27%|██▋       | 509/1861 [1:17:18<3:25:21,  9.11s/it][A
 27%|██▋       | 510/1861 [1:17:19<3:24:50,  9.10s/it][A
 27%|██▋       | 511/1861 [1:17:20<3:24:18,  9.08s/it][A
 28%|██▊       | 512/1861 [1:17:21<3:23:48,  9.06s/it][A
 28%|██▊       | 513/1861 [1:17:21<3:23:16,  9.05s/it][A
 28%|██▊       | 514/1861 [1:17:22<3:22:45,  9.03s/it][A
 28%|██▊       | 515/1861 [1:17:23<3:22:15,  9.02s/it][A
 28%|██▊       | 516/1861 [1:17:23<3:21:44,  9.00s/it][A
 28%|██▊       | 517/1861 [1:17:24<3:21:14,  8.98s/it][A
 28%|██▊     

Step: 600 | Loss: 1.3536283744871616 | Train PPL: 3.87144713751592
Wie würde eine solche Zukunft aussehen ? --> How would a future look like ? </s>



 32%|███▏      | 601/1861 [1:22:48<2:53:36,  8.27s/it][A
 32%|███▏      | 602/1861 [1:22:49<2:53:12,  8.25s/it][A
 32%|███▏      | 603/1861 [1:22:49<2:52:47,  8.24s/it][A
 32%|███▏      | 604/1861 [1:22:50<2:52:23,  8.23s/it][A
 33%|███▎      | 605/1861 [1:22:51<2:52:00,  8.22s/it][A
 33%|███▎      | 606/1861 [1:22:51<2:51:35,  8.20s/it][A
 33%|███▎      | 607/1861 [1:22:52<2:51:11,  8.19s/it][A
 33%|███▎      | 608/1861 [1:22:52<2:50:48,  8.18s/it][A
 33%|███▎      | 609/1861 [1:22:53<2:50:24,  8.17s/it][A
 33%|███▎      | 610/1861 [1:22:53<2:50:00,  8.15s/it][A
 33%|███▎      | 611/1861 [1:22:54<2:49:37,  8.14s/it][A
 33%|███▎      | 612/1861 [1:22:55<2:49:13,  8.13s/it][A
 33%|███▎      | 613/1861 [1:22:56<2:48:50,  8.12s/it][A
 33%|███▎      | 614/1861 [1:22:56<2:48:27,  8.11s/it][A
 33%|███▎      | 615/1861 [1:22:57<2:48:03,  8.09s/it][A
 33%|███▎      | 616/1861 [1:22:57<2:47:40,  8.08s/it][A
 33%|███▎      | 617/1861 [1:22:58<2:47:18,  8.07s/it][A
 33%|███▎    