In [5]:
# !pip install transformers torch pillow
# !pip install opencv-python-headless
# !pip install matplotlib

In [28]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from torch.utils.data import Dataset, DataLoader
from typing import Union, List, Tuple
from PIL import Image
import torch
import urllib
import os
import numpy as np
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import json
from datasets import Dataset
from tqdm import tqdm
import gc

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [29]:
root = '/home/sagemaker-user/img2text'
config = {
    "sketches": f"{root}/sketches",
    "ground_truth": f"{root}/ground_truth.json",
    "epochs": 50
}

# Load Sketches

In [30]:
metadata = json.load(open(f'{config["sketches"]}/metadata.json'))

sketches = []

for key in metadata:
    for i in tqdm(range(metadata[key]),desc= f'Loading {key}'):
        file_name = key+ f"_{i+1}"
        sketch = Image.open(os.path.join( config['sketches'], file_name+".png"))
        sketches.append((file_name, sketch))

Loading bike: 100%|██████████| 9/9 [00:00<00:00, 5534.19it/s]
Loading car: 100%|██████████| 10/10 [00:00<00:00, 8184.01it/s]
Loading cat: 100%|██████████| 7/7 [00:00<00:00, 7580.72it/s]
Loading cycle: 100%|██████████| 6/6 [00:00<00:00, 4531.12it/s]
Loading plane: 100%|██████████| 10/10 [00:00<00:00, 6151.81it/s]
Loading signal: 100%|██████████| 6/6 [00:00<00:00, 3076.51it/s]


# Load Model

In [31]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Baseline

In [32]:
result = {}

for category, sketch in tqdm(sketches, desc=f"Generating Caption..."):
    inputs = processor(sketch, return_tensors="pt")
    outputs = model.generate(**inputs)
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    result[category] = caption

print(result)

with open('img2text_baseline.json', 'w+') as f:
    json.dump(result, f)

Generating Caption...: 100%|██████████| 48/48 [01:02<00:00,  1.31s/it]

{'bike_1': 'a motorcycle with a rider on it', 'bike_2': 'a motorcycle is shown in the shape of a motorcycle', 'bike_3': 'a drawing of a bicycle', 'bike_4': 'a drawing of a man with a gun', 'bike_5': 'a motorcycle with a side view', 'bike_6': 'a drawing of a motorcycle', 'bike_7': 'a drawing of a person riding a bike', 'bike_8': 'a motorcycle with a helmet and helmet on it', 'bike_9': 'a motorcycle is shown in the shape of a motorcycle', 'car_1': 'a drawing of a car', 'car_2': 'a map of the state of new york', 'car_3': 'a car with a white background', 'car_4': 'a car with the number plate removed', 'car_5': 'a car is shown in the shape of a car', 'car_6': 'a drawing of a truck', 'car_7': 'a car is shown in the shape of a car', 'car_8': 'a drawing of a truck', 'car_9': 'a car with wheels and wheels', 'car_10': 'a drawing of a car', 'cat_1': 'a black and white drawing of a cat', 'cat_2': 'a black and white drawing of a cat', 'cat_3': "a drawing of a cat ' s face", 'cat_4': 'a black and wh




# Fine Tuning

## Loading Ground Truth

In [36]:
ground_truth_captions = json.load(open(config["ground_truth"]))
print(ground_truth_captions)

{'bike_1': 'Drawing of the side view of a motorcycle', 'bike_2': 'Isometric view drawing of a police motorcycle', 'bike_3': 'Isometric view drawing of a police motorcycle', 'bike_4': 'Isometric view drawing of a bike', 'bike_5': 'Isometric view drawing of a police motorcycle', 'bike_6': 'Isometric view drawing of a bullet motorcycle', 'bike_7': 'Front view drawing of a motorcycle', 'bike_8': 'Isometric view drawing of a motorcycle', 'bike_9': 'Drawing of a motorcycle', 'car_1': 'Sketch of a car', 'car_2': 'Isometric view drawing of a car', 'car_3': 'Drawing of a sedan car', 'car_4': 'Sketch of a race car with spoilers', 'car_5': 'Front view drawing of a car', 'car_6': 'Side view drawing of a car', 'car_7': 'Drawing of a car', 'car_8': 'Front view of a limousine car', 'car_9': 'Side view sketch of a car', 'car_10': 'Isometric view drawing of a classic car', 'cat_1': 'Sketch of a cat', 'cat_2': 'Outline Drawing of a cat', 'cat_3': "Drawing of a cat's face", 'cat_4': 'Side view sketch of 

## Prepare Dataset

In [37]:
dataset = []

for name, image in sketches:
    dataset.append({
        "image": image,
        "text": ground_truth_captions[name]
    })


dataset = Dataset.from_list(dataset)

In [38]:
class ImageCaptioningDataset(Dataset):
    def __init__(self, dataset, processor):
        self.dataset = dataset
        self.processor = processor

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        encoding = self.processor(images=item["image"], text=item["text"], padding="max_length", return_tensors="pt")
        # remove batch dimension
        encoding = {k:v.squeeze() for k,v in encoding.items()}
        return encoding

In [39]:
train_dataset = ImageCaptioningDataset(dataset, processor)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=2)

## Training

In [40]:
torch.cuda.empty_cache()
gc.collect()

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

scaler = torch.amp.GradScaler("cuda")

# model.to(DEVICE)
model.train()

BlipForConditionalGeneration(
  (vision_model): BlipVisionModel(
    (embeddings): BlipVisionEmbeddings(
      (patch_embedding): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (encoder): BlipEncoder(
      (layers): ModuleList(
        (0-11): 12 x BlipEncoderLayer(
          (self_attn): BlipAttention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (projection): Linear(in_features=768, out_features=768, bias=True)
          )
          (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): BlipMLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
          )
          (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
    (post_layernorm): LayerNorm((768,), eps=1e-0

In [None]:
for epoch in tqdm(range(config['epochs'])):
    optimizer.zero_grad()
    print("Epoch:", epoch)
    for idx, batch in enumerate(train_dataloader):
        # input_ids = batch.pop("input_ids").to(DEVICE)
        # pixel_values = batch.pop("pixel_values").to(DEVICE)
        input_ids = batch.pop("input_ids")
        pixel_values = batch.pop("pixel_values")
        print(pixel_values.shape)

        outputs = model(input_ids=input_ids,
                        pixel_values=pixel_values,
                        labels=input_ids)
        loss = outputs.loss

        print("Loss:", loss.item())

        loss.backward()
    
        optimizer.step()
        optimizer.zero_grad()

In [45]:
import torch
from torch.nn.functional import cross_entropy
from tqdm import tqdm

def compute_perplexity(model, inputs):
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
        perplexity = torch.exp(loss)
    return perplexity.item()

def lexical_diversity(caption):
    tokens = caption.split()
    if len(tokens) == 0:
        return 0
    return len(set(tokens)) / len(tokens)

def n_gram_diversity(caption, n=2):
    tokens = caption.split()
    if len(tokens) < n:
        return 0
    n_grams = list(zip(*[tokens[i:] for i in range(n)]))
    return len(set(n_grams)) / len(n_grams)

for epoch in tqdm(range(config['epochs'])):
    optimizer.zero_grad()
    print(f"Epoch {epoch + 1}/{config['epochs']}")
    total_loss = 0
    all_captions = []
    
    for idx, batch in tqdm(enumerate(train_dataloader)):
        input_ids = batch.pop("input_ids")
        pixel_values = batch.pop("pixel_values")
        
        outputs = model(input_ids=input_ids, pixel_values=pixel_values, labels=input_ids)
        loss = outputs.loss
        
        total_loss += loss.item()
        
        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        # Generate captions for diversity metrics
        with torch.no_grad():
            generated_captions = model.generate(pixel_values=pixel_values, max_length=20)
            decoded_captions = processor.batch_decode(generated_captions, skip_special_tokens=True)
            all_captions.extend(decoded_captions)
        
        if idx % 10 == 0:
            print(f"Batch {idx}, Loss: {loss.item():.4f}")
    
    # Perplexity computation
    inputs = {"input_ids": input_ids, "pixel_values": pixel_values}
    perplexity = compute_perplexity(model, inputs)
    print(f"Epoch {epoch + 1} Perplexity: {perplexity:.4f}")
    
    # Caption Diversity Metrics
    lexical_div = sum(lexical_diversity(caption) for caption in all_captions) / len(all_captions)
    bigram_div = sum(n_gram_diversity(caption, n=2) for caption in all_captions) / len(all_captions)
    print(f"Epoch {epoch + 1} Lexical Diversity: {lexical_div:.4f}, Bigram Diversity: {bigram_div:.4f}")



  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 1/50



0it [00:00, ?it/s][A
1it [00:09,  9.53s/it][A

Batch 0, Loss: 10.0811



2it [00:19,  9.60s/it][A
3it [00:28,  9.65s/it][A
4it [00:38,  9.65s/it][A
5it [00:48,  9.65s/it][A
6it [00:57,  9.64s/it][A
7it [01:07,  9.71s/it][A
8it [01:17,  9.76s/it][A
9it [01:27,  9.80s/it][A
10it [01:37,  9.75s/it][A
11it [01:46,  9.73s/it][A

Batch 10, Loss: 7.4522



12it [01:56,  9.69s/it][A
13it [02:06,  9.68s/it][A
14it [02:15,  9.65s/it][A
15it [02:25,  9.59s/it][A
16it [02:34,  9.57s/it][A
17it [02:43,  9.52s/it][A
18it [02:53,  9.59s/it][A
19it [03:03,  9.55s/it][A
20it [03:12,  9.56s/it][A
21it [03:22,  9.56s/it][A

Batch 20, Loss: 5.8801



22it [03:32,  9.68s/it][A
23it [03:42,  9.82s/it][A
24it [03:52,  9.67s/it][A
  2%|▏         | 1/50 [03:54<3:11:30, 234.50s/it]

Epoch 1 Perplexity: 197.4137
Epoch 1 Lexical Diversity: 1.0000, Bigram Diversity: 1.0000
Epoch 2/50



0it [00:00, ?it/s][A
1it [00:10, 10.09s/it][A

Batch 0, Loss: 5.3004



2it [00:19,  9.86s/it][A
3it [00:29,  9.77s/it][A
4it [00:39,  9.70s/it][A
5it [00:48,  9.69s/it][A
6it [00:58,  9.84s/it][A
7it [01:08,  9.83s/it][A
8it [01:18,  9.81s/it][A
9it [01:28,  9.80s/it][A
10it [01:38,  9.82s/it][A
11it [01:47,  9.82s/it][A

Batch 10, Loss: 3.7965



12it [01:57,  9.81s/it][A
13it [02:07,  9.90s/it][A
14it [02:17,  9.95s/it][A
15it [02:27,  9.89s/it][A
16it [02:37,  9.94s/it][A
17it [02:47,  9.83s/it][A
18it [02:57,  9.94s/it][A
19it [03:07,  9.87s/it][A
20it [03:17,  9.88s/it][A
21it [03:26,  9.78s/it][A

Batch 20, Loss: 2.3506



22it [03:36,  9.76s/it][A
23it [03:46,  9.75s/it][A
24it [03:55,  9.81s/it][A
  4%|▍         | 2/50 [07:52<3:09:15, 236.57s/it]

Epoch 2 Perplexity: 6.1897
Epoch 2 Lexical Diversity: 1.0000, Bigram Diversity: 1.0000
Epoch 3/50



0it [00:00, ?it/s][A
1it [00:09,  9.55s/it][A

Batch 0, Loss: 1.8273



2it [00:19,  9.88s/it][A
3it [00:29,  9.65s/it][A
4it [00:38,  9.70s/it][A
5it [00:48,  9.71s/it][A
6it [00:58,  9.70s/it][A
7it [01:08,  9.86s/it][A
8it [01:18,  9.96s/it][A
9it [01:28,  9.86s/it][A
10it [01:38,  9.94s/it][A
11it [01:48, 10.14s/it][A

Batch 10, Loss: 0.8222



12it [01:58, 10.00s/it][A
13it [02:08, 10.07s/it][A
14it [02:18,  9.94s/it][A
15it [02:28,  9.99s/it][A
16it [02:38, 10.04s/it][A
17it [02:48,  9.94s/it][A
18it [02:58,  9.83s/it][A
19it [03:08,  9.93s/it][A
20it [03:18,  9.93s/it][A
21it [03:28,  9.96s/it][A

Batch 20, Loss: 0.3537



22it [03:37,  9.91s/it][A
23it [03:47,  9.91s/it][A
24it [03:57,  9.91s/it][A
  6%|▌         | 3/50 [11:52<3:06:39, 238.29s/it]

Epoch 3 Perplexity: 1.2928
Epoch 3 Lexical Diversity: 0.9958, Bigram Diversity: 0.9977
Epoch 4/50



0it [00:00, ?it/s][A
1it [00:09,  9.95s/it][A

Batch 0, Loss: 0.2563



2it [00:19, 10.00s/it][A
3it [00:29,  9.94s/it][A
4it [00:39,  9.93s/it][A
5it [00:49,  9.84s/it][A
6it [00:59,  9.87s/it][A
7it [01:09,  9.87s/it][A
8it [01:19,  9.96s/it][A
9it [01:29,  9.94s/it][A
10it [01:38,  9.85s/it][A
11it [01:48,  9.91s/it][A

Batch 10, Loss: 0.1412



12it [01:58,  9.79s/it][A
13it [02:08,  9.86s/it][A
14it [02:18,  9.79s/it][A
15it [02:27,  9.75s/it][A
16it [02:37,  9.72s/it][A
17it [02:47,  9.78s/it][A
18it [02:57,  9.84s/it][A
19it [03:06,  9.76s/it][A
20it [03:17,  9.87s/it][A
21it [03:26,  9.87s/it][A

Batch 20, Loss: 0.0988



22it [03:36,  9.84s/it][A
23it [03:46,  9.97s/it][A
24it [03:57,  9.88s/it][A
  8%|▊         | 4/50 [15:52<3:03:03, 238.76s/it]

Epoch 4 Perplexity: 1.0927
Epoch 4 Lexical Diversity: 0.9940, Bigram Diversity: 1.0000
Epoch 5/50



0it [00:00, ?it/s][A
1it [00:09,  9.56s/it][A

Batch 0, Loss: 0.0864



2it [00:19,  9.79s/it][A
3it [00:29,  9.66s/it][A
4it [00:38,  9.67s/it][A
5it [00:48,  9.70s/it][A
6it [00:57,  9.62s/it][A
7it [01:07,  9.75s/it][A
8it [01:17,  9.66s/it][A
9it [01:27,  9.65s/it][A
10it [01:37,  9.79s/it][A
11it [01:47,  9.85s/it][A

Batch 10, Loss: 0.0678



12it [01:57,  9.94s/it][A
13it [02:06,  9.84s/it][A
14it [02:16,  9.71s/it][A
15it [02:25,  9.66s/it][A
16it [02:35,  9.59s/it][A
17it [02:44,  9.62s/it][A
18it [02:54,  9.65s/it][A
19it [03:04,  9.75s/it][A
20it [03:14,  9.82s/it][A
21it [03:24,  9.84s/it][A

Batch 20, Loss: 0.0596



22it [03:34,  9.89s/it][A
23it [03:45, 10.11s/it][A
24it [03:55,  9.79s/it][A
 10%|█         | 5/50 [19:49<2:58:43, 238.29s/it]

Epoch 5 Perplexity: 1.0555
Epoch 5 Lexical Diversity: 0.9949, Bigram Diversity: 1.0000
Epoch 6/50



0it [00:00, ?it/s][A
1it [00:10, 10.20s/it][A

Batch 0, Loss: 0.0539



2it [00:20, 10.10s/it][A
3it [00:30, 10.02s/it][A
4it [00:40, 10.01s/it][A
5it [00:49,  9.90s/it][A
6it [00:59,  9.96s/it][A
7it [01:09,  9.83s/it][A
8it [01:19,  9.80s/it][A
9it [01:29,  9.80s/it][A
10it [01:38,  9.74s/it][A
11it [01:48,  9.75s/it][A

Batch 10, Loss: 0.0485



12it [01:58,  9.78s/it][A
13it [02:08,  9.81s/it][A
14it [02:18,  9.85s/it][A
15it [02:28, 10.15s/it][A
16it [02:38,  9.96s/it][A
17it [02:47,  9.81s/it][A
18it [02:57,  9.78s/it][A
19it [03:07,  9.77s/it][A
20it [03:17,  9.74s/it][A
21it [03:27, 10.06s/it][A

Batch 20, Loss: 0.0440



22it [03:37, 10.00s/it][A
23it [03:47, 10.03s/it][A
24it [03:57,  9.90s/it][A
 12%|█▏        | 6/50 [23:50<2:55:13, 238.94s/it]

Epoch 6 Perplexity: 1.0398
Epoch 6 Lexical Diversity: 0.9962, Bigram Diversity: 1.0000
Epoch 7/50



0it [00:00, ?it/s][A
1it [00:10, 10.09s/it][A

Batch 0, Loss: 0.0389



2it [00:20, 10.13s/it][A
3it [00:30, 10.12s/it][A
4it [00:40, 10.12s/it][A
5it [00:49,  9.87s/it][A
6it [00:59,  9.69s/it][A
7it [01:08,  9.58s/it][A
8it [01:18,  9.71s/it][A
9it [01:28,  9.71s/it][A
10it [01:37,  9.70s/it][A
11it [01:47,  9.65s/it][A

Batch 10, Loss: 0.0411



12it [01:57,  9.62s/it][A
13it [02:07, 10.01s/it][A
14it [02:17, 10.01s/it][A
15it [02:28, 10.03s/it][A
16it [02:37,  9.95s/it][A
17it [02:47,  9.99s/it][A
18it [02:57, 10.00s/it][A
19it [03:08, 10.19s/it][A
20it [03:18, 10.04s/it][A
21it [03:29, 10.26s/it][A

Batch 20, Loss: 0.0321



22it [03:38, 10.09s/it][A
23it [03:48, 10.07s/it][A
24it [03:58,  9.94s/it][A
 14%|█▍        | 7/50 [27:51<2:51:44, 239.63s/it]

Epoch 7 Perplexity: 1.0335
Epoch 7 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 8/50



0it [00:00, ?it/s][A
1it [00:10, 10.79s/it][A

Batch 0, Loss: 0.0308



2it [00:20, 10.15s/it][A
3it [00:30,  9.98s/it][A
4it [00:39,  9.77s/it][A
5it [00:49,  9.81s/it][A
6it [00:59,  9.85s/it][A
7it [01:09,  9.95s/it][A
8it [01:19, 10.06s/it][A
9it [01:30, 10.10s/it][A
10it [01:40, 10.08s/it][A
11it [01:50, 10.14s/it][A

Batch 10, Loss: 0.0350



12it [02:00, 10.09s/it][A
13it [02:10, 10.01s/it][A
14it [02:20,  9.99s/it][A
15it [02:30,  9.93s/it][A
16it [02:39,  9.81s/it][A
17it [02:49,  9.81s/it][A
18it [02:59,  9.81s/it][A
19it [03:09,  9.92s/it][A
20it [03:19,  9.88s/it][A
21it [03:28,  9.86s/it][A

Batch 20, Loss: 0.0274



22it [03:38,  9.73s/it][A
23it [03:48,  9.88s/it][A
24it [03:59,  9.98s/it][A
 16%|█▌        | 8/50 [31:53<2:48:15, 240.38s/it]

Epoch 8 Perplexity: 1.0259
Epoch 8 Lexical Diversity: 0.9962, Bigram Diversity: 1.0000
Epoch 9/50



0it [00:00, ?it/s][A
1it [00:10, 10.73s/it][A

Batch 0, Loss: 0.0248



2it [00:20, 10.44s/it][A
3it [00:31, 10.32s/it][A
4it [00:40, 10.12s/it][A
5it [00:51, 10.18s/it][A
6it [01:01, 10.12s/it][A
7it [01:11, 10.32s/it][A
8it [01:21, 10.21s/it][A
9it [01:32, 10.38s/it][A
10it [01:42, 10.24s/it][A
11it [01:52, 10.15s/it][A

Batch 10, Loss: 0.0236



12it [02:02, 10.08s/it][A
13it [02:12, 10.06s/it][A
14it [02:22, 10.01s/it][A
15it [02:32, 10.00s/it][A
16it [02:42,  9.99s/it][A
17it [02:52, 10.06s/it][A
18it [03:02,  9.91s/it][A
19it [03:11,  9.84s/it][A
20it [03:21,  9.81s/it][A
21it [03:31,  9.72s/it][A

Batch 20, Loss: 0.0220



22it [03:40,  9.76s/it][A
23it [03:50,  9.77s/it][A
24it [04:00, 10.01s/it][A
 18%|█▊        | 9/50 [35:55<2:44:45, 241.11s/it]

Epoch 9 Perplexity: 1.0240
Epoch 9 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 10/50



0it [00:00, ?it/s][A
1it [00:10, 10.15s/it][A

Batch 0, Loss: 0.0235



2it [00:19,  9.85s/it][A
3it [00:29,  9.89s/it][A
4it [00:39,  9.97s/it][A
5it [00:49,  9.96s/it][A
6it [00:59,  9.95s/it][A
7it [01:09, 10.00s/it][A
8it [01:19, 10.05s/it][A
9it [01:29, 10.01s/it][A
10it [01:39,  9.98s/it][A
11it [01:50, 10.25s/it][A

Batch 10, Loss: 0.0196



12it [02:00, 10.13s/it][A
13it [02:10, 10.07s/it][A
14it [02:20, 10.17s/it][A
15it [02:30, 10.09s/it][A
16it [02:40, 10.05s/it][A
17it [02:50, 10.03s/it][A
18it [03:00,  9.90s/it][A
19it [03:10,  9.87s/it][A
20it [03:20,  9.89s/it][A
21it [03:30, 10.13s/it][A

Batch 20, Loss: 0.0214



22it [03:40,  9.98s/it][A
23it [03:49,  9.82s/it][A
24it [03:59,  9.98s/it][A
 20%|██        | 10/50 [39:57<2:40:54, 241.37s/it]

Epoch 10 Perplexity: 1.0206
Epoch 10 Lexical Diversity: 0.9962, Bigram Diversity: 1.0000
Epoch 11/50



0it [00:00, ?it/s][A
1it [00:09,  9.63s/it][A

Batch 0, Loss: 0.0173



2it [00:19,  9.81s/it][A
3it [00:29,  9.71s/it][A
4it [00:38,  9.71s/it][A
5it [00:48,  9.79s/it][A
6it [00:59,  9.97s/it][A
7it [01:08,  9.89s/it][A
8it [01:19, 10.17s/it][A
9it [01:29, 10.09s/it][A
10it [01:38,  9.89s/it][A
11it [01:49,  9.95s/it][A

Batch 10, Loss: 0.0171



12it [01:59, 10.16s/it][A
13it [02:09, 10.19s/it][A
14it [02:20, 10.19s/it][A
15it [02:30, 10.09s/it][A
16it [02:39, 10.03s/it][A
17it [02:49, 10.04s/it][A
18it [02:59,  9.95s/it][A
19it [03:09,  9.85s/it][A
20it [03:19,  9.87s/it][A
21it [03:28,  9.84s/it][A

Batch 20, Loss: 0.0188



22it [03:39, 10.04s/it][A
23it [03:49, 10.02s/it][A
24it [03:58,  9.96s/it][A
 22%|██▏       | 11/50 [43:59<2:36:54, 241.40s/it]

Epoch 11 Perplexity: 1.0182
Epoch 11 Lexical Diversity: 0.9941, Bigram Diversity: 1.0000
Epoch 12/50



0it [00:00, ?it/s][A
1it [00:09,  9.91s/it][A

Batch 0, Loss: 0.0203



2it [00:19,  9.79s/it][A
3it [00:29,  9.83s/it][A
4it [00:38,  9.70s/it][A
5it [00:48,  9.67s/it][A
6it [00:59, 10.01s/it][A
7it [01:08,  9.87s/it][A
8it [01:19,  9.98s/it][A
9it [01:28,  9.96s/it][A
10it [01:38,  9.95s/it][A
11it [01:48,  9.94s/it][A

Batch 10, Loss: 0.0162



12it [01:58,  9.97s/it][A
13it [02:09, 10.07s/it][A
14it [02:20, 10.32s/it][A
15it [02:30, 10.23s/it][A
16it [02:39, 10.12s/it][A
17it [02:49, 10.01s/it][A
18it [02:59,  9.99s/it][A
19it [03:09,  9.92s/it][A
20it [03:19, 10.00s/it][A
21it [03:29,  9.90s/it][A

Batch 20, Loss: 0.0131



22it [03:39,  9.97s/it][A
23it [03:49,  9.91s/it][A
24it [03:59,  9.97s/it][A
 24%|██▍       | 12/50 [48:00<2:32:57, 241.51s/it]

Epoch 12 Perplexity: 1.0137
Epoch 12 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 13/50



0it [00:00, ?it/s][A
1it [00:10, 10.12s/it][A

Batch 0, Loss: 0.0150



2it [00:20, 10.52s/it][A
3it [00:30, 10.25s/it][A
4it [00:40,  9.96s/it][A
5it [00:50,  9.99s/it][A
6it [01:00,  9.98s/it][A
7it [01:09,  9.81s/it][A
8it [01:19,  9.70s/it][A
9it [01:29,  9.83s/it][A
10it [01:40, 10.13s/it][A
11it [01:50, 10.16s/it][A

Batch 10, Loss: 0.0152



12it [02:00, 10.19s/it][A
13it [02:10, 10.04s/it][A
14it [02:20,  9.93s/it][A
15it [02:30, 10.04s/it][A
16it [02:40, 10.09s/it][A
17it [02:50,  9.99s/it][A
18it [03:00, 10.03s/it][A
19it [03:10,  9.97s/it][A
20it [03:20, 10.05s/it][A
21it [03:30,  9.99s/it][A

Batch 20, Loss: 0.0137



22it [03:40,  9.93s/it][A
23it [03:49,  9.90s/it][A
24it [04:00, 10.03s/it][A
 26%|██▌       | 13/50 [52:04<2:29:14, 242.00s/it]

Epoch 13 Perplexity: 1.0122
Epoch 13 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 14/50



0it [00:00, ?it/s][A
1it [00:09,  9.94s/it][A

Batch 0, Loss: 0.0125



2it [00:19,  9.93s/it][A
3it [00:29, 10.02s/it][A
4it [00:40, 10.34s/it][A
5it [00:50, 10.16s/it][A
6it [01:00, 10.13s/it][A
7it [01:11, 10.19s/it][A
8it [01:21, 10.21s/it][A
9it [01:31, 10.08s/it][A
10it [01:41, 10.08s/it][A
11it [01:50,  9.98s/it][A

Batch 10, Loss: 0.0121



12it [02:00,  9.84s/it][A
13it [02:10,  9.93s/it][A
14it [02:21, 10.19s/it][A
15it [02:31, 10.04s/it][A
16it [02:40,  9.92s/it][A
17it [02:50,  9.89s/it][A
18it [03:00,  9.99s/it][A
19it [03:10,  9.87s/it][A
20it [03:21, 10.15s/it][A
21it [03:30,  9.96s/it][A

Batch 20, Loss: 0.0135



22it [03:40,  9.88s/it][A
23it [03:50, 10.00s/it][A
24it [04:00, 10.03s/it][A
 28%|██▊       | 14/50 [56:07<2:25:24, 242.35s/it]

Epoch 14 Perplexity: 1.0105
Epoch 14 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 15/50



0it [00:00, ?it/s][A
1it [00:10, 10.83s/it][A

Batch 0, Loss: 0.0098



2it [00:21, 10.49s/it][A
3it [00:30, 10.11s/it][A
4it [00:40, 10.16s/it][A
5it [00:50, 10.09s/it][A
6it [01:00,  9.99s/it][A
7it [01:10,  9.91s/it][A
8it [01:20, 10.02s/it][A
9it [01:30, 10.06s/it][A
10it [01:40,  9.96s/it][A
11it [01:50,  9.95s/it][A

Batch 10, Loss: 0.0108



12it [02:00, 10.00s/it][A
13it [02:10, 10.05s/it][A
14it [02:20,  9.97s/it][A
15it [02:30,  9.94s/it][A
16it [02:39,  9.80s/it][A
17it [02:50,  9.95s/it][A
18it [03:00,  9.97s/it][A
19it [03:10,  9.98s/it][A
20it [03:20, 10.03s/it][A
21it [03:31, 10.29s/it][A

Batch 20, Loss: 0.0096



22it [03:41, 10.28s/it][A
23it [03:51, 10.11s/it][A
24it [04:01, 10.04s/it][A
 30%|███       | 15/50 [1:00:10<2:21:35, 242.72s/it]

Epoch 15 Perplexity: 1.0089
Epoch 15 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 16/50



0it [00:00, ?it/s][A
1it [00:10, 10.16s/it][A

Batch 0, Loss: 0.0092



2it [00:20, 10.52s/it][A
3it [00:31, 10.33s/it][A
4it [00:41, 10.22s/it][A
5it [00:50, 10.03s/it][A
6it [01:00,  9.84s/it][A
7it [01:10,  9.87s/it][A
8it [01:19,  9.83s/it][A
9it [01:29,  9.90s/it][A
10it [01:40, 10.06s/it][A
11it [01:50,  9.93s/it][A

Batch 10, Loss: 0.0084



12it [01:59,  9.92s/it][A
13it [02:09,  9.91s/it][A
14it [02:20, 10.03s/it][A
15it [02:30, 10.13s/it][A
16it [02:40, 10.08s/it][A
17it [02:50, 10.08s/it][A
18it [03:00,  9.95s/it][A
19it [03:11, 10.25s/it][A
20it [03:21, 10.16s/it][A
21it [03:31, 10.21s/it][A

Batch 20, Loss: 0.0081



22it [03:41, 10.23s/it][A
23it [03:52, 10.43s/it][A
24it [04:02, 10.12s/it][A
 32%|███▏      | 16/50 [1:04:16<2:17:58, 243.49s/it]

Epoch 16 Perplexity: 1.0120
Epoch 16 Lexical Diversity: 0.9943, Bigram Diversity: 1.0000
Epoch 17/50



0it [00:00, ?it/s][A
1it [00:10, 10.21s/it][A

Batch 0, Loss: 0.0079



2it [00:20, 10.02s/it][A
3it [00:29,  9.92s/it][A
4it [00:40, 10.10s/it][A
5it [00:50, 10.17s/it][A
6it [01:00, 10.01s/it][A
7it [01:10,  9.96s/it][A
8it [01:20, 10.00s/it][A
9it [01:29,  9.93s/it][A
10it [01:40, 10.21s/it][A
11it [01:51, 10.42s/it][A

Batch 10, Loss: 0.0075



12it [02:02, 10.39s/it][A
13it [02:12, 10.48s/it][A
14it [02:23, 10.57s/it][A
15it [02:33, 10.26s/it][A
16it [02:42, 10.09s/it][A
17it [02:52, 10.05s/it][A
18it [03:02, 10.11s/it][A
19it [03:12, 10.04s/it][A
20it [03:22,  9.93s/it][A
21it [03:32,  9.99s/it][A

Batch 20, Loss: 0.0075



22it [03:42,  9.97s/it][A
23it [03:52, 10.07s/it][A
24it [04:02, 10.12s/it][A
 34%|███▍      | 17/50 [1:08:21<2:14:13, 244.03s/it]

Epoch 17 Perplexity: 1.0076
Epoch 17 Lexical Diversity: 0.9920, Bigram Diversity: 1.0000
Epoch 18/50



0it [00:00, ?it/s][A
1it [00:09,  9.89s/it][A

Batch 0, Loss: 0.0072



2it [00:19,  9.71s/it][A
3it [00:29,  9.95s/it][A
4it [00:39, 10.03s/it][A
5it [00:50, 10.10s/it][A
6it [01:00, 10.10s/it][A
7it [01:10, 10.08s/it][A
8it [01:19,  9.96s/it][A
9it [01:30, 10.05s/it][A
10it [01:41, 10.29s/it][A
11it [01:50, 10.13s/it][A

Batch 10, Loss: 0.0069



12it [02:00, 10.15s/it][A
13it [02:11, 10.13s/it][A
14it [02:20, 10.06s/it][A
15it [02:30, 10.06s/it][A
16it [02:41, 10.20s/it][A
17it [02:51, 10.06s/it][A
18it [03:01, 10.09s/it][A
19it [03:12, 10.29s/it][A
20it [03:21, 10.14s/it][A
21it [03:32, 10.32s/it][A

Batch 20, Loss: 0.0066



22it [03:42, 10.26s/it][A
23it [03:52, 10.05s/it][A
24it [04:02, 10.10s/it][A
 36%|███▌      | 18/50 [1:12:26<2:10:18, 244.32s/it]

Epoch 18 Perplexity: 1.0065
Epoch 18 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 19/50



0it [00:00, ?it/s][A
1it [00:09,  9.78s/it][A

Batch 0, Loss: 0.0067



2it [00:19,  9.61s/it][A
3it [00:28,  9.63s/it][A
4it [00:39, 10.08s/it][A
5it [00:50, 10.26s/it][A
6it [01:00, 10.24s/it][A
7it [01:10, 10.23s/it][A
8it [01:20, 10.18s/it][A
9it [01:30, 10.04s/it][A
10it [01:40, 10.04s/it][A
11it [01:50, 10.10s/it][A

Batch 10, Loss: 0.0064



12it [02:00, 10.01s/it][A
13it [02:10,  9.88s/it][A
14it [02:19,  9.86s/it][A
15it [02:29,  9.91s/it][A
16it [02:40,  9.98s/it][A
17it [02:50,  9.99s/it][A
18it [02:59,  9.92s/it][A
19it [03:10, 10.12s/it][A
20it [03:20,  9.95s/it][A
21it [03:29,  9.89s/it][A

Batch 20, Loss: 0.0062



22it [03:39,  9.95s/it][A
23it [03:49,  9.98s/it][A
24it [03:59,  9.99s/it][A
 38%|███▊      | 19/50 [1:16:28<2:05:53, 243.67s/it]

Epoch 19 Perplexity: 1.0059
Epoch 19 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 20/50



0it [00:00, ?it/s][A
1it [00:10, 10.00s/it][A

Batch 0, Loss: 0.0060



2it [00:20, 10.07s/it][A
3it [00:30, 10.29s/it][A
4it [00:41, 10.43s/it][A
5it [00:51, 10.31s/it][A
6it [01:01, 10.13s/it][A
7it [01:11, 10.16s/it][A
8it [01:21, 10.03s/it][A
9it [01:31, 10.25s/it][A
10it [01:41, 10.13s/it][A
11it [01:51, 10.11s/it][A

Batch 10, Loss: 0.0060



12it [02:02, 10.30s/it][A
13it [02:12, 10.12s/it][A
14it [02:22, 10.06s/it][A
15it [02:32, 10.01s/it][A
16it [02:42, 10.09s/it][A
17it [02:52, 10.14s/it][A
18it [03:02, 10.19s/it][A
19it [03:12, 10.02s/it][A
20it [03:22,  9.98s/it][A
21it [03:32,  9.88s/it][A

Batch 20, Loss: 0.0059



22it [03:42,  9.93s/it][A
23it [03:52,  9.99s/it][A
24it [04:02, 10.10s/it][A
 40%|████      | 20/50 [1:20:33<2:02:00, 244.01s/it]

Epoch 20 Perplexity: 1.0073
Epoch 20 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 21/50



0it [00:00, ?it/s][A
1it [00:09,  9.96s/it][A

Batch 0, Loss: 0.0073



2it [00:20, 10.06s/it][A
3it [00:29,  9.97s/it][A
4it [00:40, 10.12s/it][A
5it [00:50, 10.05s/it][A
6it [00:59,  9.91s/it][A
7it [01:10, 10.14s/it][A
8it [01:20, 10.18s/it][A
9it [01:30, 10.10s/it][A
10it [01:40, 10.07s/it][A
11it [01:51, 10.27s/it][A

Batch 10, Loss: 0.0053



12it [02:02, 10.45s/it][A
13it [02:12, 10.24s/it][A
14it [02:22, 10.17s/it][A
15it [02:31, 10.10s/it][A
16it [02:41, 10.03s/it][A
17it [02:51,  9.98s/it][A
18it [03:01,  9.84s/it][A
19it [03:11,  9.96s/it][A
20it [03:21, 10.00s/it][A
21it [03:32, 10.21s/it][A

Batch 20, Loss: 0.0051



22it [03:42, 10.20s/it][A
23it [03:52, 10.24s/it][A
24it [04:02, 10.11s/it][A
 42%|████▏     | 21/50 [1:24:38<1:58:05, 244.33s/it]

Epoch 21 Perplexity: 1.0052
Epoch 21 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 22/50



0it [00:00, ?it/s][A
1it [00:09,  9.93s/it][A

Batch 0, Loss: 0.0051



2it [00:19,  9.74s/it][A
3it [00:29,  9.96s/it][A
4it [00:39, 10.02s/it][A
5it [00:49,  9.82s/it][A
6it [00:59,  9.97s/it][A
7it [01:09, 10.04s/it][A
8it [01:19, 10.06s/it][A
9it [01:29, 10.04s/it][A
10it [01:40, 10.24s/it][A
11it [01:50, 10.07s/it][A

Batch 10, Loss: 0.0049



12it [02:00, 10.15s/it][A
13it [02:10, 10.18s/it][A
14it [02:21, 10.41s/it][A
15it [02:31, 10.25s/it][A
16it [02:41, 10.14s/it][A
17it [02:51, 10.01s/it][A
18it [03:01,  9.97s/it][A
19it [03:12, 10.25s/it][A
20it [03:22, 10.19s/it][A
21it [03:32, 10.15s/it][A

Batch 20, Loss: 0.0047



22it [03:42, 10.14s/it][A
23it [03:52, 10.03s/it][A
24it [04:01, 10.08s/it][A
 44%|████▍     | 22/50 [1:28:42<1:54:01, 244.34s/it]

Epoch 22 Perplexity: 1.0081
Epoch 22 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 23/50



0it [00:00, ?it/s][A
1it [00:09,  9.90s/it][A

Batch 0, Loss: 0.0047



2it [00:19,  9.83s/it][A
3it [00:29,  9.92s/it][A
4it [00:39,  9.85s/it][A
5it [00:49,  9.93s/it][A
6it [00:59, 10.02s/it][A
7it [01:09,  9.90s/it][A
8it [01:19,  9.98s/it][A
9it [01:30, 10.22s/it][A
10it [01:41, 10.39s/it][A
11it [01:51, 10.49s/it][A

Batch 10, Loss: 0.0046



12it [02:01, 10.28s/it][A
13it [02:11, 10.03s/it][A
14it [02:20,  9.84s/it][A
15it [02:30,  9.84s/it][A
16it [02:40,  9.96s/it][A
17it [02:50, 10.04s/it][A
18it [03:00,  9.86s/it][A
19it [03:10,  9.93s/it][A
20it [03:20,  9.95s/it][A
21it [03:30,  9.95s/it][A

Batch 20, Loss: 0.0084



22it [03:40,  9.93s/it][A
23it [03:50, 10.03s/it][A
24it [04:01, 10.05s/it][A
 46%|████▌     | 23/50 [1:32:46<1:49:51, 244.12s/it]

Epoch 23 Perplexity: 1.0044
Epoch 23 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 24/50



0it [00:00, ?it/s][A
1it [00:09,  9.73s/it][A

Batch 0, Loss: 0.0055



2it [00:19,  9.93s/it][A
3it [00:29,  9.88s/it][A
4it [00:39,  9.98s/it][A
5it [00:49,  9.83s/it][A
6it [00:58,  9.77s/it][A
7it [01:09,  9.95s/it][A
8it [01:19,  9.98s/it][A
9it [01:29, 10.19s/it][A
10it [01:39,  9.98s/it][A
11it [01:49,  9.84s/it][A

Batch 10, Loss: 0.0043



12it [01:59,  9.97s/it][A
13it [02:10, 10.21s/it][A
14it [02:20, 10.41s/it][A
15it [02:30, 10.24s/it][A
16it [02:40, 10.13s/it][A
17it [02:50, 10.05s/it][A
18it [03:00, 10.02s/it][A
19it [03:11, 10.25s/it][A
20it [03:21, 10.26s/it][A
21it [03:31, 10.24s/it][A

Batch 20, Loss: 0.0053



22it [03:41, 10.15s/it][A
23it [03:51, 10.17s/it][A
24it [04:01, 10.07s/it][A
 48%|████▊     | 24/50 [1:36:50<1:45:47, 244.14s/it]

Epoch 24 Perplexity: 1.0041
Epoch 24 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 25/50



0it [00:00, ?it/s][A
1it [00:10, 10.15s/it][A

Batch 0, Loss: 0.0041



2it [00:21, 10.58s/it][A
3it [00:31, 10.45s/it][A
4it [00:41, 10.36s/it][A
5it [00:51, 10.08s/it][A
6it [01:00, 10.01s/it][A
7it [01:10,  9.92s/it][A
8it [01:20,  9.90s/it][A
9it [01:30,  9.92s/it][A
10it [01:40,  9.94s/it][A
11it [01:50, 10.01s/it][A

Batch 10, Loss: 0.0040



12it [02:01, 10.11s/it][A
13it [02:11, 10.14s/it][A
14it [02:21, 10.02s/it][A
15it [02:30,  9.91s/it][A
16it [02:40,  9.87s/it][A
17it [02:51, 10.15s/it][A
18it [03:01, 10.15s/it][A
19it [03:12, 10.38s/it][A
20it [03:22, 10.33s/it][A
21it [03:32, 10.18s/it][A

Batch 20, Loss: 0.0042



22it [03:42, 10.24s/it][A
23it [03:52, 10.17s/it][A
24it [04:03, 10.15s/it][A
 50%|█████     | 25/50 [1:40:56<1:41:57, 244.70s/it]

Epoch 25 Perplexity: 1.0053
Epoch 25 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 26/50



0it [00:00, ?it/s][A
1it [00:10, 10.23s/it][A

Batch 0, Loss: 0.0038



2it [00:19,  9.83s/it][A
3it [00:29,  9.82s/it][A
4it [00:39,  9.79s/it][A
5it [00:49,  9.99s/it][A
6it [00:59,  9.90s/it][A
7it [01:09,  9.97s/it][A
8it [01:19,  9.94s/it][A
9it [01:29,  9.88s/it][A
10it [01:39,  9.88s/it][A
11it [01:48,  9.78s/it][A

Batch 10, Loss: 0.0037



12it [01:58,  9.92s/it][A
13it [02:08,  9.88s/it][A
14it [02:18,  9.99s/it][A
15it [02:28,  9.98s/it][A
16it [02:39, 10.24s/it][A
17it [02:50, 10.39s/it][A
18it [03:00, 10.37s/it][A
19it [03:11, 10.53s/it][A
20it [03:21, 10.34s/it][A
21it [03:31, 10.20s/it][A

Batch 20, Loss: 0.0036



22it [03:41, 10.04s/it][A
23it [03:51, 10.10s/it][A
24it [04:01, 10.06s/it][A
 52%|█████▏    | 26/50 [1:45:00<1:37:48, 244.50s/it]

Epoch 26 Perplexity: 1.0036
Epoch 26 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 27/50



0it [00:00, ?it/s][A
1it [00:10, 10.01s/it][A

Batch 0, Loss: 0.0036



2it [00:20, 10.21s/it][A
3it [00:30, 10.11s/it][A
4it [00:40, 10.16s/it][A
5it [00:51, 10.30s/it][A
6it [01:01, 10.18s/it][A
7it [01:11, 10.23s/it][A
8it [01:21, 10.05s/it][A
9it [01:31, 10.13s/it][A
10it [01:41, 10.02s/it][A
11it [01:51, 10.02s/it][A

Batch 10, Loss: 0.0035



12it [02:01, 10.04s/it][A
13it [02:11, 10.05s/it][A
14it [02:21, 10.02s/it][A
15it [02:30,  9.84s/it][A
16it [02:40,  9.85s/it][A
17it [02:50,  9.93s/it][A
18it [03:01, 10.18s/it][A
19it [03:11, 10.22s/it][A
20it [03:21, 10.12s/it][A
21it [03:31, 10.01s/it][A

Batch 20, Loss: 0.0058



22it [03:42, 10.22s/it][A
23it [03:52, 10.39s/it][A
24it [04:03, 10.13s/it][A
 54%|█████▍    | 27/50 [1:49:06<1:33:50, 244.81s/it]

Epoch 27 Perplexity: 1.0034
Epoch 27 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 28/50



0it [00:00, ?it/s][A
1it [00:09,  9.62s/it][A

Batch 0, Loss: 0.0034



2it [00:19,  9.99s/it][A
3it [00:30, 10.23s/it][A
4it [00:40, 10.02s/it][A
5it [00:50, 10.30s/it][A
6it [01:00, 10.11s/it][A
7it [01:10, 10.03s/it][A
8it [01:20, 10.01s/it][A
9it [01:30,  9.95s/it][A
10it [01:40,  9.89s/it][A
11it [01:50, 10.00s/it][A

Batch 10, Loss: 0.0033



12it [02:00, 10.09s/it][A
13it [02:10,  9.96s/it][A
14it [02:20,  9.90s/it][A
15it [02:29,  9.82s/it][A
16it [02:39,  9.86s/it][A
17it [02:50, 10.14s/it][A
18it [03:00, 10.19s/it][A
19it [03:10, 10.05s/it][A
20it [03:20,  9.94s/it][A
21it [03:30, 10.19s/it][A

Batch 20, Loss: 0.0032



22it [03:41, 10.24s/it][A
23it [03:51, 10.15s/it][A
24it [04:01, 10.06s/it][A
 56%|█████▌    | 28/50 [1:53:10<1:29:40, 244.56s/it]

Epoch 28 Perplexity: 1.0032
Epoch 28 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 29/50



0it [00:00, ?it/s][A
1it [00:10, 10.03s/it][A

Batch 0, Loss: 0.0032



2it [00:20, 10.11s/it][A
3it [00:30, 10.36s/it][A
4it [00:40, 10.06s/it][A
5it [00:50, 10.15s/it][A
6it [01:00,  9.97s/it][A
7it [01:09,  9.84s/it][A
8it [01:20,  9.96s/it][A
9it [01:31, 10.25s/it][A
10it [01:40, 10.15s/it][A
11it [01:51, 10.15s/it][A

Batch 10, Loss: 0.0035



12it [02:01, 10.22s/it][A
13it [02:11, 10.07s/it][A
14it [02:21, 10.13s/it][A
15it [02:31,  9.95s/it][A
16it [02:40,  9.91s/it][A
17it [02:51, 10.00s/it][A
18it [03:00,  9.97s/it][A
19it [03:11, 10.04s/it][A
20it [03:21, 10.18s/it][A
21it [03:31, 10.18s/it][A

Batch 20, Loss: 0.0030



22it [03:41,  9.98s/it][A
23it [03:51,  9.93s/it][A
24it [04:02, 10.09s/it][A
 58%|█████▊    | 29/50 [1:57:14<1:25:35, 244.55s/it]

Epoch 29 Perplexity: 1.0030
Epoch 29 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 30/50



0it [00:00, ?it/s][A
1it [00:10, 10.22s/it][A

Batch 0, Loss: 0.0030



2it [00:20, 10.04s/it][A
3it [00:30, 10.01s/it][A
4it [00:39,  9.91s/it][A
5it [00:50, 10.21s/it][A
6it [01:01, 10.36s/it][A
7it [01:11, 10.48s/it][A
8it [01:22, 10.41s/it][A
9it [01:31, 10.19s/it][A
10it [01:42, 10.18s/it][A
11it [01:52, 10.21s/it][A

Batch 10, Loss: 0.0029



12it [02:02, 10.14s/it][A
13it [02:12,  9.99s/it][A
14it [02:22, 10.07s/it][A
15it [02:32, 10.02s/it][A
16it [02:42, 10.00s/it][A
17it [02:52, 10.10s/it][A
18it [03:02, 10.07s/it][A
19it [03:12, 10.08s/it][A
20it [03:22, 10.04s/it][A
21it [03:32, 10.00s/it][A

Batch 20, Loss: 0.0029



22it [03:42, 10.02s/it][A
23it [03:52,  9.91s/it][A
24it [04:02, 10.12s/it][A
 60%|██████    | 30/50 [2:01:19<1:21:35, 244.78s/it]

Epoch 30 Perplexity: 1.0029
Epoch 30 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 31/50



0it [00:00, ?it/s][A
1it [00:10, 10.25s/it][A

Batch 0, Loss: 0.0028



2it [00:20, 10.17s/it][A
3it [00:30, 10.06s/it][A
4it [00:40,  9.94s/it][A
5it [00:50, 10.18s/it][A
6it [01:00, 10.00s/it][A
7it [01:10, 10.07s/it][A
8it [01:20, 10.13s/it][A
9it [01:30, 10.08s/it][A
10it [01:40, 10.10s/it][A
11it [01:50, 10.00s/it][A

Batch 10, Loss: 0.0030



12it [02:00,  9.86s/it][A
13it [02:09,  9.80s/it][A
14it [02:19,  9.74s/it][A
15it [02:29,  9.91s/it][A
16it [02:39,  9.98s/it][A
17it [02:50, 10.08s/it][A
18it [03:00, 10.11s/it][A
19it [03:10, 10.18s/it][A
20it [03:20, 10.11s/it][A
21it [03:30, 10.07s/it][A

Batch 20, Loss: 0.0027



22it [03:41, 10.28s/it][A
23it [03:52, 10.47s/it][A
24it [04:03, 10.14s/it][A
 62%|██████▏   | 31/50 [2:05:25<1:17:36, 245.09s/it]

Epoch 31 Perplexity: 1.0027
Epoch 31 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 32/50



0it [00:00, ?it/s][A
1it [00:10, 10.89s/it][A

Batch 0, Loss: 0.0027



2it [00:21, 10.76s/it][A
3it [00:31, 10.39s/it][A
4it [00:41, 10.15s/it][A
5it [00:51, 10.17s/it][A
6it [01:01,  9.96s/it][A
7it [01:10,  9.82s/it][A
8it [01:20,  9.90s/it][A
9it [01:30, 10.03s/it][A
10it [01:41, 10.10s/it][A
11it [01:51, 10.07s/it][A

Batch 10, Loss: 0.0026



12it [02:01, 10.07s/it][A
13it [02:11, 10.08s/it][A
14it [02:21, 10.14s/it][A
15it [02:31, 10.04s/it][A
16it [02:41, 10.06s/it][A
17it [02:52, 10.28s/it][A
18it [03:02, 10.21s/it][A
19it [03:12, 10.13s/it][A
20it [03:22, 10.17s/it][A
21it [03:33, 10.39s/it][A

Batch 20, Loss: 0.0026



22it [03:43, 10.23s/it][A
23it [03:53, 10.08s/it][A
24it [04:03, 10.13s/it][A
 64%|██████▍   | 32/50 [2:09:31<1:13:33, 245.22s/it]

Epoch 32 Perplexity: 1.0026
Epoch 32 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 33/50



0it [00:00, ?it/s][A
1it [00:09,  9.89s/it][A

Batch 0, Loss: 0.0035



2it [00:20, 10.43s/it][A
3it [00:30, 10.20s/it][A
4it [00:40, 10.10s/it][A
5it [00:51, 10.37s/it][A
6it [01:01, 10.11s/it][A
7it [01:10, 10.05s/it][A
8it [01:20, 10.00s/it][A
9it [01:30, 10.02s/it][A
10it [01:40,  9.98s/it][A
11it [01:51, 10.09s/it][A

Batch 10, Loss: 0.0025



12it [02:01, 10.05s/it][A
13it [02:11, 10.02s/it][A
14it [02:21, 10.11s/it][A
15it [02:32, 10.28s/it][A
16it [02:42, 10.47s/it][A
17it [02:52, 10.34s/it][A
18it [03:03, 10.27s/it][A
19it [03:13, 10.26s/it][A
20it [03:23, 10.12s/it][A
21it [03:32,  9.98s/it][A

Batch 20, Loss: 0.0024



22it [03:42, 10.05s/it][A
23it [03:53, 10.11s/it][A
24it [04:03, 10.13s/it][A
 66%|██████▌   | 33/50 [2:13:36<1:09:30, 245.35s/it]

Epoch 33 Perplexity: 1.0024
Epoch 33 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 34/50



0it [00:00, ?it/s][A
1it [00:10, 10.92s/it][A

Batch 0, Loss: 0.0024



2it [00:20, 10.15s/it][A
3it [00:30, 10.24s/it][A
4it [00:41, 10.46s/it][A
5it [00:51, 10.27s/it][A
6it [01:01, 10.11s/it][A
7it [01:11,  9.99s/it][A
8it [01:21, 10.06s/it][A
9it [01:31, 10.13s/it][A
10it [01:42, 10.33s/it][A
11it [01:52, 10.24s/it][A

Batch 10, Loss: 0.0024



12it [02:02, 10.33s/it][A
13it [02:12, 10.20s/it][A
14it [02:23, 10.22s/it][A
15it [02:32, 10.04s/it][A
16it [02:43, 10.10s/it][A
17it [02:53, 10.09s/it][A
18it [03:02,  9.99s/it][A
19it [03:13, 10.11s/it][A
20it [03:23, 10.19s/it][A
21it [03:33, 10.23s/it][A

Batch 20, Loss: 0.0023



22it [03:43, 10.02s/it][A
23it [03:53,  9.95s/it][A
24it [04:03, 10.13s/it][A
 68%|██████▊   | 34/50 [2:17:42<1:05:27, 245.44s/it]

Epoch 34 Perplexity: 1.0023
Epoch 34 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 35/50



0it [00:00, ?it/s][A
1it [00:09,  9.69s/it][A

Batch 0, Loss: 0.0023



2it [00:19,  9.82s/it][A
3it [00:30, 10.25s/it][A
4it [00:40, 10.24s/it][A
5it [00:50, 10.08s/it][A
6it [01:00,  9.98s/it][A
7it [01:10, 10.24s/it][A
8it [01:21, 10.28s/it][A
9it [01:30, 10.07s/it][A
10it [01:41, 10.26s/it][A
11it [01:51, 10.13s/it][A

Batch 10, Loss: 0.0023



12it [02:01, 10.05s/it][A
13it [02:11, 10.08s/it][A
14it [02:21, 10.10s/it][A
15it [02:32, 10.28s/it][A
16it [02:41, 10.08s/it][A
17it [02:52, 10.11s/it][A
18it [03:02, 10.15s/it][A
19it [03:12, 10.19s/it][A
20it [03:22, 10.08s/it][A
21it [03:32, 10.14s/it][A

Batch 20, Loss: 0.0022



22it [03:42,  9.94s/it][A
23it [03:52, 10.05s/it][A
24it [04:02, 10.10s/it][A
 70%|███████   | 35/50 [2:21:47<1:01:18, 245.25s/it]

Epoch 35 Perplexity: 1.0031
Epoch 35 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 36/50



0it [00:00, ?it/s][A
1it [00:09,  9.90s/it][A

Batch 0, Loss: 0.0022



2it [00:20, 10.12s/it][A
3it [00:29,  9.97s/it][A
4it [00:39,  9.81s/it][A
5it [00:49,  9.83s/it][A
6it [00:59,  9.82s/it][A
7it [01:08,  9.71s/it][A
8it [01:18,  9.84s/it][A
9it [01:29, 10.05s/it][A
10it [01:38,  9.92s/it][A
11it [01:48,  9.75s/it][A

Batch 10, Loss: 0.0022



12it [01:58,  9.75s/it][A
13it [02:07,  9.79s/it][A
14it [02:18,  9.94s/it][A
15it [02:28, 10.17s/it][A
16it [02:39, 10.20s/it][A
17it [02:49, 10.16s/it][A
18it [02:59, 10.15s/it][A
19it [03:09, 10.17s/it][A
20it [03:19, 10.18s/it][A
21it [03:30, 10.36s/it][A

Batch 20, Loss: 0.0021



22it [03:40, 10.20s/it][A
23it [03:50, 10.04s/it][A
24it [04:00, 10.03s/it][A
 72%|███████▏  | 36/50 [2:25:50<57:04, 244.64s/it]  

Epoch 36 Perplexity: 1.0021
Epoch 36 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 37/50



0it [00:00, ?it/s][A
1it [00:09,  9.95s/it][A

Batch 0, Loss: 0.0021



2it [00:20, 10.05s/it][A
3it [00:29,  9.86s/it][A
4it [00:39, 10.02s/it][A
5it [00:50, 10.09s/it][A
6it [01:00, 10.15s/it][A
7it [01:10, 10.17s/it][A
8it [01:20,  9.94s/it][A
9it [01:29,  9.92s/it][A
10it [01:40, 10.18s/it][A
11it [01:50, 10.08s/it][A

Batch 10, Loss: 0.0021



12it [02:00,  9.92s/it][A
13it [02:10,  9.97s/it][A
14it [02:20,  9.91s/it][A
15it [02:30,  9.95s/it][A
16it [02:40, 10.15s/it][A
17it [02:50, 10.03s/it][A
18it [03:00, 10.07s/it][A
19it [03:10, 10.04s/it][A
20it [03:20, 10.11s/it][A
21it [03:31, 10.30s/it][A

Batch 20, Loss: 0.0020



22it [03:41, 10.07s/it][A
23it [03:51, 10.12s/it][A
24it [04:00, 10.04s/it][A
 74%|███████▍  | 37/50 [2:29:53<52:55, 244.26s/it]

Epoch 37 Perplexity: 1.0020
Epoch 37 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 38/50



0it [00:00, ?it/s][A
1it [00:10, 10.47s/it][A

Batch 0, Loss: 0.0020



2it [00:20, 10.20s/it][A
3it [00:30, 10.17s/it][A
4it [00:40, 10.16s/it][A
5it [00:51, 10.20s/it][A
6it [01:01, 10.12s/it][A
7it [01:11, 10.16s/it][A
8it [01:21, 10.08s/it][A
9it [01:31, 10.01s/it][A
10it [01:40,  9.92s/it][A
11it [01:50,  9.95s/it][A

Batch 10, Loss: 0.0020



12it [02:00,  9.89s/it][A
13it [02:10,  9.89s/it][A
14it [02:20,  9.81s/it][A
15it [02:30,  9.92s/it][A
16it [02:40, 10.16s/it][A
17it [02:51, 10.35s/it][A
18it [03:01, 10.24s/it][A
19it [03:11, 10.15s/it][A
20it [03:21, 10.11s/it][A
21it [03:31, 10.16s/it][A

Batch 20, Loss: 0.0019



22it [03:41, 10.02s/it][A
23it [03:51,  9.90s/it][A
24it [04:02, 10.09s/it][A
 76%|███████▌  | 38/50 [2:33:58<48:52, 244.34s/it]

Epoch 38 Perplexity: 1.0019
Epoch 38 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 39/50



0it [00:00, ?it/s][A
1it [00:10, 10.68s/it][A

Batch 0, Loss: 0.0019



2it [00:20, 10.42s/it][A
3it [00:31, 10.31s/it][A
4it [00:40, 10.09s/it][A
5it [00:51, 10.32s/it][A
6it [01:01, 10.10s/it][A
7it [01:10,  9.93s/it][A
8it [01:21, 10.03s/it][A
9it [01:31, 10.04s/it][A
10it [01:41, 10.17s/it][A
11it [01:51, 10.02s/it][A

Batch 10, Loss: 0.0019



12it [02:01,  9.94s/it][A
13it [02:11,  9.95s/it][A
14it [02:20,  9.96s/it][A
15it [02:30,  9.93s/it][A
16it [02:40,  9.89s/it][A
17it [02:51, 10.12s/it][A
18it [03:01, 10.04s/it][A
19it [03:11,  9.99s/it][A
20it [03:20,  9.92s/it][A
21it [03:30,  9.97s/it][A

Batch 20, Loss: 0.0018



22it [03:40,  9.99s/it][A
23it [03:51, 10.09s/it][A
24it [04:01, 10.06s/it][A
 78%|███████▊  | 39/50 [2:38:02<44:46, 244.20s/it]

Epoch 39 Perplexity: 1.0019
Epoch 39 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 40/50



0it [00:00, ?it/s][A
1it [00:10, 10.05s/it][A

Batch 0, Loss: 0.0034



2it [00:19,  9.92s/it][A
3it [00:29,  9.75s/it][A
4it [00:40, 10.17s/it][A
5it [00:49, 10.00s/it][A
6it [01:00, 10.10s/it][A
7it [01:10, 10.27s/it][A
8it [01:20, 10.20s/it][A
9it [01:30, 10.11s/it][A
10it [01:41, 10.14s/it][A
11it [01:50, 10.06s/it][A

Batch 10, Loss: 0.0018



12it [02:00,  9.92s/it][A
13it [02:11, 10.15s/it][A
14it [02:21, 10.08s/it][A
15it [02:30,  9.98s/it][A
16it [02:41, 10.06s/it][A
17it [02:51, 10.16s/it][A
18it [03:01, 10.04s/it][A
19it [03:11, 10.10s/it][A
20it [03:21, 10.12s/it][A
21it [03:31,  9.98s/it][A

Batch 20, Loss: 0.0036



22it [03:41,  9.95s/it][A
23it [03:51, 10.20s/it][A
24it [04:02, 10.09s/it][A
 80%|████████  | 40/50 [2:42:07<40:43, 244.37s/it]

Epoch 40 Perplexity: 1.0018
Epoch 40 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 41/50



0it [00:00, ?it/s][A
1it [00:10, 10.19s/it][A

Batch 0, Loss: 0.0018



2it [00:21, 10.57s/it][A
3it [00:31, 10.42s/it][A
4it [00:41, 10.53s/it][A
5it [00:51, 10.16s/it][A
6it [01:01, 10.16s/it][A
7it [01:11,  9.98s/it][A
8it [01:21, 10.01s/it][A
9it [01:31, 10.15s/it][A
10it [01:41, 10.02s/it][A
11it [01:51, 10.07s/it][A

Batch 10, Loss: 0.0017



12it [02:01,  9.90s/it][A
13it [02:11, 10.00s/it][A
14it [02:20,  9.86s/it][A
15it [02:31, 10.12s/it][A
16it [02:41,  9.95s/it][A
17it [02:50,  9.88s/it][A
18it [03:00,  9.93s/it][A
19it [03:10,  9.87s/it][A
20it [03:20,  9.89s/it][A
21it [03:30,  9.97s/it][A

Batch 20, Loss: 0.0017



22it [03:40,  9.90s/it][A
23it [03:50,  9.84s/it][A
24it [04:00, 10.01s/it][A
 82%|████████▏ | 41/50 [2:46:09<36:35, 243.89s/it]

Epoch 41 Perplexity: 1.0017
Epoch 41 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 42/50



0it [00:00, ?it/s][A
1it [00:09,  9.62s/it][A

Batch 0, Loss: 0.0017



2it [00:19, 10.01s/it][A
3it [00:30, 10.12s/it][A
4it [00:39,  9.91s/it][A
5it [00:49,  9.97s/it][A
6it [00:59,  9.89s/it][A
7it [01:09,  9.82s/it][A
8it [01:20, 10.14s/it][A
9it [01:30, 10.11s/it][A
10it [01:40, 10.15s/it][A
11it [01:50, 10.18s/it][A

Batch 10, Loss: 0.0016



12it [02:00, 10.23s/it][A
13it [02:10, 10.05s/it][A
14it [02:21, 10.29s/it][A
15it [02:31, 10.16s/it][A
16it [02:41, 10.18s/it][A
17it [02:51, 10.08s/it][A
18it [03:01, 10.21s/it][A
19it [03:12, 10.32s/it][A
20it [03:22, 10.20s/it][A
21it [03:32, 10.06s/it][A

Batch 20, Loss: 0.0046



22it [03:42, 10.14s/it][A
23it [03:52, 10.16s/it][A
24it [04:02, 10.11s/it][A
 84%|████████▍ | 42/50 [2:50:14<32:33, 244.23s/it]

Epoch 42 Perplexity: 1.0016
Epoch 42 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 43/50



0it [00:00, ?it/s][A
1it [00:09,  9.51s/it][A

Batch 0, Loss: 0.0016



2it [00:19,  9.80s/it][A
3it [00:29,  9.97s/it][A
4it [00:39,  9.97s/it][A
5it [00:49,  9.98s/it][A
6it [00:59, 10.02s/it][A
7it [01:09,  9.94s/it][A
8it [01:20, 10.16s/it][A
9it [01:30, 10.13s/it][A
10it [01:40, 10.13s/it][A
11it [01:50, 10.03s/it][A

Batch 10, Loss: 0.0016



12it [02:00, 10.27s/it][A
13it [02:10, 10.05s/it][A
14it [02:21, 10.26s/it][A
15it [02:31, 10.23s/it][A
16it [02:41, 10.23s/it][A
17it [02:51, 10.21s/it][A
18it [03:01, 10.05s/it][A
19it [03:11, 10.15s/it][A
20it [03:21,  9.98s/it][A
21it [03:31,  9.91s/it][A

Batch 20, Loss: 0.0016



22it [03:41,  9.95s/it][A
23it [03:51, 10.02s/it][A
24it [04:01, 10.05s/it][A
 86%|████████▌ | 43/50 [2:54:18<28:28, 244.03s/it]

Epoch 43 Perplexity: 1.0015
Epoch 43 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 44/50



0it [00:00, ?it/s][A
1it [00:10, 10.16s/it][A

Batch 0, Loss: 0.0016



2it [00:20, 10.19s/it][A
3it [00:30, 10.21s/it][A
4it [00:40,  9.91s/it][A
5it [00:50, 10.18s/it][A
6it [01:00, 10.13s/it][A
7it [01:10, 10.00s/it][A
8it [01:20,  9.91s/it][A
9it [01:30, 10.00s/it][A
10it [01:40,  9.97s/it][A
11it [01:50, 10.01s/it][A

Batch 10, Loss: 0.0015



12it [02:00, 10.03s/it][A
13it [02:10,  9.95s/it][A
14it [02:21, 10.22s/it][A
15it [02:31, 10.24s/it][A
16it [02:41, 10.13s/it][A
17it [02:51, 10.16s/it][A
18it [03:01, 10.13s/it][A
19it [03:12, 10.34s/it][A
20it [03:22, 10.29s/it][A
21it [03:32, 10.18s/it][A

Batch 20, Loss: 0.0028



22it [03:42, 10.10s/it][A
23it [03:53, 10.31s/it][A
24it [04:03, 10.14s/it][A
 88%|████████▊ | 44/50 [2:58:24<24:27, 244.57s/it]

Epoch 44 Perplexity: 1.0015
Epoch 44 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 45/50



0it [00:00, ?it/s][A
1it [00:09,  9.93s/it][A

Batch 0, Loss: 0.0015



2it [00:20, 10.13s/it][A
3it [00:29,  9.97s/it][A
4it [00:40, 10.08s/it][A
5it [00:50, 10.02s/it][A
6it [01:00, 10.05s/it][A
7it [01:10, 10.07s/it][A
8it [01:20, 10.03s/it][A
9it [01:30,  9.93s/it][A
10it [01:40, 10.01s/it][A
11it [01:50, 10.06s/it][A

Batch 10, Loss: 0.0022



12it [02:00, 10.06s/it][A
13it [02:10,  9.92s/it][A
14it [02:19,  9.85s/it][A
15it [02:29,  9.79s/it][A
16it [02:38,  9.74s/it][A
17it [02:49, 10.11s/it][A
18it [03:00, 10.11s/it][A
19it [03:10, 10.07s/it][A
20it [03:20, 10.28s/it][A
21it [03:31, 10.41s/it][A

Batch 20, Loss: 0.0014



22it [03:41, 10.20s/it][A
23it [03:51, 10.07s/it][A
24it [04:01, 10.06s/it][A
 90%|█████████ | 45/50 [3:02:28<20:22, 244.40s/it]

Epoch 45 Perplexity: 1.0015
Epoch 45 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 46/50



0it [00:00, ?it/s][A
1it [00:10, 10.25s/it][A

Batch 0, Loss: 0.0014



2it [00:20, 10.17s/it][A
3it [00:29,  9.87s/it][A
4it [00:39,  9.79s/it][A
5it [00:49,  9.85s/it][A
6it [00:59,  9.99s/it][A
7it [01:10, 10.21s/it][A
8it [01:20, 10.23s/it][A
9it [01:30, 10.21s/it][A
10it [01:41, 10.30s/it][A
11it [01:51, 10.40s/it][A

Batch 10, Loss: 0.0014



12it [02:02, 10.32s/it][A
13it [02:12, 10.22s/it][A
14it [02:21, 10.12s/it][A
15it [02:32, 10.09s/it][A
16it [02:41, 10.00s/it][A
17it [02:52, 10.09s/it][A
18it [03:02, 10.03s/it][A
19it [03:11,  9.99s/it][A
20it [03:22, 10.07s/it][A
21it [03:31,  9.88s/it][A

Batch 20, Loss: 0.0014



22it [03:41,  9.83s/it][A
23it [03:52, 10.11s/it][A
24it [04:02, 10.09s/it][A
 92%|█████████▏| 46/50 [3:06:33<16:17, 244.48s/it]

Epoch 46 Perplexity: 1.0014
Epoch 46 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 47/50



0it [00:00, ?it/s][A
1it [00:10, 10.09s/it][A

Batch 0, Loss: 0.0014



2it [00:20, 10.19s/it][A
3it [00:30, 10.16s/it][A
4it [00:40,  9.96s/it][A
5it [00:49,  9.81s/it][A
6it [00:59,  9.83s/it][A
7it [01:09,  9.86s/it][A
8it [01:19,  9.81s/it][A
9it [01:29, 10.10s/it][A
10it [01:40, 10.26s/it][A
11it [01:50, 10.24s/it][A

Batch 10, Loss: 0.0014



12it [02:00, 10.13s/it][A
13it [02:10, 10.17s/it][A
14it [02:20, 10.08s/it][A
15it [02:31, 10.17s/it][A
16it [02:41, 10.38s/it][A
17it [02:52, 10.28s/it][A
18it [03:02, 10.40s/it][A
19it [03:12, 10.14s/it][A
20it [03:21, 10.02s/it][A
21it [03:31,  9.95s/it][A

Batch 20, Loss: 0.0013



22it [03:41,  9.89s/it][A
23it [03:51,  9.84s/it][A
24it [04:01, 10.06s/it][A
 94%|█████████▍| 47/50 [3:10:36<12:12, 244.28s/it]

Epoch 47 Perplexity: 1.0013
Epoch 47 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 48/50



0it [00:00, ?it/s][A
1it [00:09,  9.62s/it][A

Batch 0, Loss: 0.0013



2it [00:19,  9.97s/it][A
3it [00:30, 10.06s/it][A
4it [00:39,  9.85s/it][A
5it [00:49,  9.91s/it][A
6it [01:00, 10.28s/it][A
7it [01:10, 10.13s/it][A
8it [01:21, 10.37s/it][A
9it [01:31, 10.25s/it][A
10it [01:41, 10.16s/it][A
11it [01:51, 10.14s/it][A

Batch 10, Loss: 0.0013



12it [02:01, 10.04s/it][A
13it [02:11, 10.08s/it][A
14it [02:20,  9.93s/it][A
15it [02:31, 10.02s/it][A
16it [02:41, 10.22s/it][A
17it [02:51, 10.15s/it][A
18it [03:01, 10.16s/it][A
19it [03:12, 10.18s/it][A
20it [03:23, 10.39s/it][A
21it [03:33, 10.36s/it][A

Batch 20, Loss: 0.0026



22it [03:43, 10.34s/it][A
23it [03:53, 10.25s/it][A
24it [04:03, 10.16s/it][A
 96%|█████████▌| 48/50 [3:14:43<08:09, 244.90s/it]

Epoch 48 Perplexity: 1.0013
Epoch 48 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 49/50



0it [00:00, ?it/s][A
1it [00:09,  9.68s/it][A

Batch 0, Loss: 0.0013



2it [00:19,  9.94s/it][A
3it [00:30, 10.32s/it][A
4it [00:40, 10.21s/it][A
5it [00:50, 10.20s/it][A
6it [01:00, 10.12s/it][A
7it [01:11, 10.30s/it][A
8it [01:21, 10.15s/it][A
9it [01:31, 10.09s/it][A
10it [01:41,  9.99s/it][A
11it [01:51, 10.21s/it][A

Batch 10, Loss: 0.0013



12it [02:01, 10.15s/it][A
13it [02:11, 10.00s/it][A
14it [02:21,  9.97s/it][A
15it [02:31,  9.90s/it][A
16it [02:41,  9.97s/it][A
17it [02:50,  9.91s/it][A
18it [03:00,  9.75s/it][A
19it [03:09,  9.70s/it][A
20it [03:19,  9.81s/it][A
21it [03:29,  9.69s/it][A

Batch 20, Loss: 0.0012



22it [03:39,  9.80s/it][A
23it [03:49,  9.78s/it][A
24it [03:59,  9.99s/it][A
 98%|█████████▊| 49/50 [3:18:45<04:04, 244.09s/it]

Epoch 49 Perplexity: 1.0013
Epoch 49 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000
Epoch 50/50



0it [00:00, ?it/s][A
1it [00:10, 10.03s/it][A

Batch 0, Loss: 0.0012



2it [00:19,  9.95s/it][A
3it [00:29,  9.99s/it][A
4it [00:39,  9.94s/it][A
5it [00:49,  9.75s/it][A
6it [00:59,  9.99s/it][A
7it [01:09, 10.02s/it][A
8it [01:19,  9.87s/it][A
9it [01:30, 10.13s/it][A
10it [01:39,  9.98s/it][A
11it [01:49,  9.98s/it][A

Batch 10, Loss: 0.0031



12it [01:59,  9.94s/it][A
13it [02:09,  9.96s/it][A
14it [02:19, 10.02s/it][A
15it [02:30, 10.20s/it][A
16it [02:40, 10.18s/it][A
17it [02:50, 10.04s/it][A
18it [03:00, 10.23s/it][A
19it [03:10, 10.05s/it][A
20it [03:19,  9.90s/it][A
21it [03:30,  9.97s/it][A

Batch 20, Loss: 0.0012



22it [03:39,  9.91s/it][A
23it [03:49,  9.86s/it][A
24it [03:59,  9.97s/it][A
100%|██████████| 50/50 [3:22:47<00:00, 243.34s/it]

Epoch 50 Perplexity: 1.0012
Epoch 50 Lexical Diversity: 0.9894, Bigram Diversity: 1.0000





## Inference

In [194]:
result = {}

for category, sketch in tqdm(sketches, desc=f"Generating Caption..."):
    inputs = processor(sketch, return_tensors="pt")
    outputs = model.generate(**inputs)
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    result[category] = caption

print(result)

with open('img2text_finetuned.json', 'w+') as f:
    json.dump(result, f)

Generating Caption...: 100%|██████████| 47/47 [01:36<00:00,  2.05s/it]

{'bike_1': 'isometric view drawing of a police motorcycle', 'bike_2': 'isometric view drawing of a police motorcycle', 'bike_3': 'isometric view drawing of a motorcycle', 'bike_4': 'isometric view drawing of a motorcycle', 'bike_5': 'isometric view drawing of a police motorcycle', 'bike_6': 'isometric view drawing of a motorcycle', 'bike_7': 'isometric view drawing of a police motorcycle', 'bike_8': 'isometric view drawing of a motorcycle', 'bike_9': 'isometric view drawing of a motorcycle', 'car_1': 'isometric view drawing of a car', 'car_2': 'isometric view drawing of a car', 'car_3': 'isometric view drawing of a car', 'car_4': 'sideview drawing of a car', 'car_5': 'sideview drawing of a car', 'car_6': 'sideview drawing of a car', 'car_7': 'isometric view drawing of a car', 'car_8': 'sideview drawing of a car', 'car_9': 'sideview drawing of a car', 'car_10': 'isometric view drawing of a classic car', 'cat_1': 'sketch of a cat', 'cat_2': 'drawing of a cat', 'cat_3': 'drawing of a cat'




## Download the model parameters for deployment

In [196]:
model.save_pretrained('./fine_tuned_blip')
processor.save_pretrained('./fine_tuned_blip_processor')


[]