In [1]:
import matplotlib.pyplot as plt
import string
import torch
import torchvision
from PIL import Image
from torch import nn, Tensor
from torchvision import transforms
import os
import math
import unicodedata
from copy import deepcopy
import re
import numpy as np
from torch.nn import functional as F
from tqdm import tqdm
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

# ----------------------------- Data Loader ------------------------------

In [2]:
NUM_WORKERS = os.cpu_count()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
    return s.strip()

In [4]:
SOS_token = "SOS"
EOS_token = "EOS"
PAD_token = "PAD"

class Lang:
    def __init__(self):
        self.word2index = {"SOS": 0,"EOS":1,"PAD":2}
        self.word2count = {"SOS": 1,"EOS":1,"PAD":1}
        self.index2word = {0: "SOS", 1: "EOS",2:"PAD"}
        self.n_words = 4
    
    def addSentence(self,sentence):
        for word in sentence.split(' '):
            self.addWord(word)
    
    def addWord(self,word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [5]:
IMG_SIZE = 224
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])

In [6]:
class Data(Dataset):
    def __init__(self,img_folder_path,caption_file):
        super().__init__()
        self.img_folder_path = img_folder_path
        self.image_dir = os.listdir(self.img_folder_path)
        self.caption_file = caption_file
        with open(self.caption_file) as f:
            self.lines = f.read().split('\n')
        self.lang = Lang()
        self.pairs = []
        self.seq_len = 0
        for line in self.lines:
            l = line.split('\t')
            if len(l) < 2:
                continue
            img_path = l[0][:-2]
            if img_path not in self.image_dir:
                continue
            caption = l[1]
            caption = normalizeString(caption)
            if len(caption.split(' ')) > 10:
                continue
            self.pairs.append([caption,img_path])
            self.lang.addSentence(caption)
            self.seq_len = max(self.seq_len,len(caption.split(' ')))

            
        self.sos_token = torch.tensor([self.lang.word2index[SOS_token]],dtype=torch.int64)
        self.eos_token = torch.tensor([self.lang.word2index[EOS_token]],dtype=torch.int64)
        self.pad_token = torch.tensor([self.lang.word2index[PAD_token]],dtype=torch.int64)
        self.seq_len += 2
            
    def __len__(self):
        return len(self.pairs)
    
    def __getitem__(self,idx):
        pair = self.pairs[idx]
        img_name = pair[1]
        caption = pair[0]
        tokens = [self.lang.word2index[word] for word in caption.split(' ')]
        num_pad = max(0,self.seq_len - len(tokens)- 2)
        dec_input = torch.cat(
            [
                self.sos_token,
                torch.tensor(tokens,dtype=torch.int64),
                torch.tensor([self.pad_token]*num_pad,dtype=torch.int64),
                self.eos_token
            ],
            dim = 0
        )
        
#         label = torch.cat(
#             [
#                 torch.tensor(tokens,dtype=torch.int64),
#                 self.eos_token,
#                 torch.tensor([self.pad_token]*num_pad,dtype=torch.int64),
#             ],
#             dim = 0
#         )
        
        img = Image.open(f'{self.img_folder_path}/{img_name}')
        img = transform(img)
        
        return {
            "dec_input":dec_input,
            "image":img,
        }
        

# ------------------------- PATCH EMBEDDINGS ------------------------

In [7]:
class PatchEmbedding(nn.Module):

    def __init__(self, encode_size:int, embed_dim:int):
        super().__init__()

        self.embed_dim = embed_dim
        resnet = torchvision.models.resnet101(pretrained=True)
        self.resnet = nn.Sequential(*list(resnet.children())[:-2])
        self.downsampling = nn.Conv2d(in_channels=2048,
                                      out_channels=embed_dim,
                                      kernel_size=1,
                                      stride=1,
                                      bias=False)
        self.bn = nn.BatchNorm2d(embed_dim)
        self.relu = nn.ReLU(inplace=True)
        self.adaptive_resize = nn.AdaptiveAvgPool2d(encode_size)

    def forward(self, images: Tensor):
        B = images.size()[0]
        out = self.resnet(images)
        out = self.relu(self.bn(self.downsampling(out)))
        out = self.adaptive_resize(out)
        out = out.view(B, self.embed_dim, -1).permute(0, 2, 1)
        return out

    def fine_tune(self, fine_tune=True):
        for p in self.resnet.parameters():
            p.requires_grad = False

        for c in list(self.resnet.children())[5:]:
            for p in c.parameters():
                p.requires_grad = fine_tune

# ------------------------ Positional Encoding -------------------------

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, embed_dim: int, max_len: int):
        super().__init__()

        self.embed_dim = embed_dim
        pe = torch.zeros(max_len, embed_dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, embed_dim, 2).float() *
            (-math.log(10000.0) / embed_dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe, persistent=False)

    def forward(self, x: Tensor) -> Tensor:
        x = x * math.sqrt(self.embed_dim)
        x = x + self.pe[:, :x.size(1)]
        return x

# ------------------------ Attention Mechanism -------------------------

In [9]:
class Attention(nn.Module):
    def __init__(self, embed_dim:int, heads:int, dropout:float):
        super().__init__()
        self.embed_dim = embed_dim
        self.heads = heads
        self.head_dim = embed_dim // heads
        self.dropout = nn.Dropout(dropout)
        assert (
            self.head_dim * heads == embed_dim
        ), "Embedding size needs to be divisible by heads"

        self.values = nn.Linear(self.embed_dim, self.embed_dim, bias=False)
        self.keys = nn.Linear(self.embed_dim, self.embed_dim, bias=False)
        self.queries = nn.Linear(self.embed_dim, self.embed_dim, bias=False)
        self.fc_out = nn.Linear(self.embed_dim, self.embed_dim,bias=False)
    
    def forward(self, query, keys, values, attn_mask = None,key_padding_mask = None):
        N = query.shape[0]

        value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]
        
        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(query)
        
        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        queries = queries.reshape(N, query_len, self.heads, self.head_dim)

        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
        
        if attn_mask is None:
            energy = energy / (self.embed_dim ** (1 / 2))
        else:
            energy = (energy + attn_mask) / (self.embed_dim ** (1 / 2))
        #print(f"Energy {energy.shape}")
        if key_padding_mask is None:
            attention = torch.softmax(energy,dim=3)
        else:
            attention = torch.softmax(energy + key_padding_mask,dim=3)
            
        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
            N, query_len, self.heads * self.head_dim
        )
        out = self.fc_out(out)
        avg_attn = attention.sum(dim=1)
        avg_attn /= self.heads
        
        return self.dropout(out),attention

# ----------------------- CNN FEED-FORWARD -------------------------

In [10]:
class CNNFF(nn.Module):
    def __init__(self,encode_size:int,embed_dim:int,feedforward_dim:int,dropout:float):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels=encode_size,
                               out_channels=feedforward_dim,
                               kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=feedforward_dim,
                               out_channels=encode_size,
                               kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=dropout)
        self.layer_norm = nn.LayerNorm(embed_dim)
    
    def forward(self,inputs:Tensor):
        output = self.conv2(self.relu(self.conv1(inputs)))
        output = self.dropout(output)
        return self.layer_norm(output + inputs)

# ------------------------------- Encoder -------------------------------

In [11]:
class EncSelfAttention(nn.Module):
    def __init__(self,embed_dim:int,num_heads:int,dropout:float):
        super().__init__()
        
        self.attention = Attention(embed_dim,num_heads,dropout)
        self.layer_norm = nn.LayerNorm(embed_dim)
        
    def forward(self,enc_inp:Tensor):
        enc_out , _ = self.attention(enc_inp,enc_inp,enc_inp)
        enc_out = enc_out + enc_inp
        enc_out = self.layer_norm(enc_out)
        return enc_out

In [12]:
class EncoderLayer(nn.Module):
    def __init__(self,encode_size:int,embed_dim:int,feedforward_dim:int,num_heads:int,dropout:float):
        super().__init__()
        
        self.enc_attn = EncSelfAttention(embed_dim,num_heads,dropout)
        self.cnn = CNNFF(encode_size,embed_dim,feedforward_dim,dropout)
        
    def forward(self,enc_inp):
        enc_out = self.enc_attn(enc_inp)
        enc_out = self.cnn(enc_out)
        return enc_out

In [13]:
class Encoder(nn.Module):
    def __init__(self,layer:EncoderLayer,num_layers:int):
        super().__init__()
        
        self.layers = nn.ModuleList(
            [deepcopy(layer) for _ in range(num_layers)])
        
    def forward(self,x:Tensor):
        for layer in self.layers:
            x = layer(x)
            
        return x

# ------------------------------- Decoder -------------------------------

In [14]:
class DecoderLayer(nn.Module):
    def __init__(self,embed_dim:int ,num_heads:int ,feedforward_dim:int, dropout:float):
        super().__init__()
        
        self.dec_self_attn = Attention(embed_dim,num_heads,dropout)
        self.multihead_attn = Attention(embed_dim,num_heads,dropout)
        
        self.self_attn_norm = nn.LayerNorm(embed_dim)
        self.multihead_norm = nn.LayerNorm(embed_dim)
        
        self.self_attn_dropout = nn.Dropout(dropout)
        self.multihead_dropout = nn.Dropout(dropout)
        
        self.ff = nn.Sequential(nn.Linear(embed_dim,feedforward_dim),
                               nn.ReLU(inplace=True),
                               nn.Dropout(dropout),
                               nn.Linear(feedforward_dim,embed_dim))
        
        self.ff_norm = nn.LayerNorm(embed_dim)
        self.ff_dropout = nn.Dropout(dropout)
        
    def forward(self,dec_inps:Tensor ,enc_out: Tensor, tg_mask:Tensor = None, tg_pad_mask:Tensor = None):
        
        out, _ = self.dec_self_attn(dec_inps,dec_inps,dec_inps, attn_mask=tg_mask, key_padding_mask = tg_pad_mask)
        out = dec_inps + self.self_attn_dropout(out)
        out = self.self_attn_norm(out)
        out2, attn = self.multihead_attn(out,enc_out,enc_out)
        out = out + self.multihead_dropout(out2)
        out = self.multihead_norm(out)
        
        out2 = self.ff(out)
        out = self.ff_norm(out + self.ff_dropout(out2))
        return out, attn

In [15]:
class Decoder(nn.Module):
    def __init__(self,layer:DecoderLayer,vocab_size:int ,embed_dim:int, num_layers:int ,
                 max_len:int, dropout:float, pad_id:int):
        super().__init__()
        self.pad_id = pad_id
        self.cap_embed = nn.Embedding(vocab_size,embed_dim,padding_idx=pad_id)
        self.pos_embed = PositionalEncoding(embed_dim,max_len)
        
        self.layers = nn.ModuleList(
            [deepcopy(layer) for _ in range(num_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
    def get_attn_mask(self, x:int):
        return torch.triu(torch.ones(x,x)*float('-inf'),diagonal=1)
    
    def get_pad_mask(self,x):
        mask = (x == self.pad_id)
        return mask[:,None,None,:]
        
    def forward(self, tg_cap:Tensor, src_img:Tensor):
        
        tgt_pad_mask = self.get_pad_mask(tg_cap)
        #print(f'Padding mask: {tgt_pad_mask.shape}')
        tgt_attn_mask = self.get_attn_mask(tg_cap.size()[1])
        tgt_attn_mask = tgt_attn_mask.to(tg_cap.device)
        #print(f"Attention mask: {tgt_attn_mask.shape}")
        tg_cap = self.cap_embed(tg_cap)
        tg_cap = self.dropout(self.pos_embed(tg_cap))
        
        attn_all = []
        for layer in self.layers:
            tg_cap, attn = layer(tg_cap ,src_img ,tgt_attn_mask ,tgt_pad_mask)
            attn_all.append(attn)
#         print(attn_all)
        attn_all = torch.stack(attn_all)
#         print(attn_all)
        return tg_cap,attn_all

# ----------------------------- Transformer ------------------------------

In [16]:
class Transformer(nn.Module):
    def __init__(self,vocab_size:int, embed_dim:int, encode_size:int, enc_ff_dim:int,
                dec_ff_dim:int, enc_n_layers:int, dec_n_layers:int, enc_n_heads:int,
                dec_n_heads:int, max_len:int, dropout:float = 0.1,
                pad_id:int = 2):
        super().__init__()
        
        enc_layer = EncoderLayer(encode_size, embed_dim, enc_ff_dim, enc_n_heads, dropout)
        dec_layer = DecoderLayer(embed_dim, dec_n_heads, dec_ff_dim, dropout)
        
        self.encoder = Encoder(enc_layer, enc_n_layers)
        self.decoder = Decoder(dec_layer, vocab_size, embed_dim, dec_n_layers, max_len, dropout, pad_id)
        
        self.predictor = nn.Linear(embed_dim,vocab_size,bias = False)
        
    def forward(self, images:Tensor , captions:Tensor):
        
        enc_out = self.encoder(images)
        tgt_cap, attn = self.decoder(captions,enc_out)
        #print(tgt_cap.shape)
        predictions = self.predictor(tgt_cap)
        return predictions.contiguous(), attn.contiguous()
    

In [17]:
def loss_fn(logits:Tensor, targets:Tensor, attn: Tensor,criterion):
    v_size = logits.size()[-1]
    targets = targets.contiguous()
    loss = criterion(logits.view(-1,v_size),targets.view(-1))
    
    attn = attn.permute(0,2,1,3,4)
    
    ln,hn = attn.size()[:2]
    
    alphas = 1.0*(1. - attn.sum(dim=3).view(ln,hn,-1))**2
    dsar = alphas.mean(-1).sum()
    
    return loss + dsar

In [18]:
def clip_gradient(embed_optim,transformer_optim):
    for optim in [embed_optim,transformer_optim]:
        for group in optim.param_groups:
            for param in group["params"]:
                if param.grad is not None:
                    param.grad.data.clamp_(-5.0,5.0)
                    
    return embed_optim,transformer_optim

In [19]:
def generate(embed,transformer,lang):
    img_path = "/kaggle/input/flikr8k/Flickr8K/Flicker8k_Images/1002674143_1b742ab4b8.jpg"
    img = Image.open(img_path)
    img = transform(img)
    img = img.unsqueeze(0).to(device)
    start = torch.full(size=(1, 1),fill_value=0,dtype=torch.long,device=device)
    
    with torch.no_grad():
        k = 5
        h = 14
        w = 14
        emb_out = embed(img)
        logits, attn = transformer(emb_out,start)

        log_prob = F.log_softmax(logits,dim=2)
        log_prob_top, ind_top = log_prob.topk(k,sorted=True)

        curr_pred = torch.cat(
            [start.expand(k,1),ind_top.view(k,1)],dim = 1)

    seq_pred = []
    seq_log_prob = []
    seq_attn = []

    while curr_pred.size(1) <= 10 and k > 0 and curr_pred.nelement():
        with torch.no_grad():
            img_exp = emb_out.expand(k,*emb_out.size()[1:])
            logits,attn = transformer(img_exp,curr_pred)

            log_prob = F.log_softmax(logits[:,-1:,:],dim=-1).squeeze(1)
            log_prob = log_prob + log_prob_top.view(k,1)

            log_prob_top, ind_top = log_prob.view(-1).topk(k,sorted = True)

            prev_seq_k, next_word_id = np.unravel_index(ind_top.cpu(),log_prob.size())
            next_word_id = torch.as_tensor(next_word_id).to(device).view(k,1)

            curr_pred = torch.cat((curr_pred[prev_seq_k],next_word_id),dim = 1)

            seq_end = (next_word_id == 1).view(-1)

        if torch.any(seq_end):
            seq_pred.extend(seq.tolist() for seq in curr_pred[seq_end])

            seq_log_prob.extend(log_prob_top[seq_end].tolist())

            attn = attn[-1].mean(dim=1).view(k,-1,h,w)

            seq_attn.extend(attn[prev_seq_k][seq_end].tolist())

            k -= torch.sum(seq_end)
            curr_pred = curr_pred[~seq_end]
            log_prob_top = log_prob_top[~seq_end]

    specials = [0,1,2]
    if seq_pred and seq_attn and seq_log_prob:
        seq_pred,seq_attn,seq_log_prob = zip(*sorted(
            zip(seq_pred,seq_attn,seq_log_prob),key = lambda tup:-tup[2]))

        text_pred = [[lang.index2word[s] for s in seq if s not in specials]
                     for seq in seq_pred]
        print(text_pred)

In [20]:
def run(embed,transformer):
    num_epochs = 100
    data = Data("/kaggle/input/flikr8k/Flickr8K/Flicker8k_Images","/kaggle/input/flikr8k/Flickr8K/Flickr8k_text/Flickr8k.lemma.token.txt")
    dataloader = DataLoader(data,16,shuffle = True,num_workers=NUM_WORKERS,pin_memory=True)
    
    embed_lr = 1e-4
    transformer_lr = 1e-4
    embed_params = filter(lambda p: p.requires_grad,embed.parameters())
    transformer_params = filter(lambda p: p.requires_grad,transformer.parameters())
    
    embed_optim = torch.optim.Adam(embed_params,embed_lr)
    transformer_optim = torch.optim.Adam(transformer_params,transformer_lr)
    
    criterion = nn.CrossEntropyLoss(ignore_index = 2).to(device)
    
    for epoch in range(num_epochs+1):
        embed.train()
        transformer.train()
        iterator = tqdm(dataloader,desc = f"Processing Epoch {epoch}:")
        for batch in iterator:
            images = batch["image"].to(device)
            tokens = batch["dec_input"].to(device)
            embed_optim.zero_grad()
            transformer_optim.zero_grad()
            
            with torch.set_grad_enabled(True):
                embed_out = embed(images)
                
                logits,attns = transformer(embed_out,tokens[:,:-1])
                
                loss = loss_fn(logits,tokens[:,1:],attns,criterion)
                
                loss.backward()
                embed_optim,transformer_optim = clip_gradient(embed_optim,transformer_optim)
#                 torch.nn.utils.clip_grad_norm_(embed.parameters(), 5)
#                 torch.nn.utils.clip_grad_norm_(transformer.parameters(), 5)
                embed_optim.step()
                transformer_optim.step()
                iterator.set_postfix({"loss": f"{loss.item():6.3f}"})
        embed.eval()
        transformer.eval()
        generate(embed,transformer,data.lang)
        if epoch < 70:
            if epoch%10 == 0:
                torch.save(transformer.state_dict(), f'transformer_{epoch}.pth')
                torch.save(embed.state_dict(), f'embed_{epoch}.pth')
        else:
            if epoch%5 == 0:
                torch.save(transformer.state_dict(), f'transformer_{epoch}.pth')
                torch.save(embed.state_dict(), f'embed_{epoch}.pth')

In [21]:
embed = PatchEmbedding(14,768).to(device)
transformer = Transformer(4258,768,196,768,3072,2,8,12,12,12,0.1,2).to(device)
embed.fine_tune(True)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:03<00:00, 59.0MB/s]


In [22]:
run(embed,transformer)

Processing Epoch 0:: 100%|██████████| 1303/1303 [05:11<00:00,  4.18it/s, loss=88.306]
Processing Epoch 1:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=88.130]
Processing Epoch 2:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=87.640]
Processing Epoch 3:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=87.680]
Processing Epoch 4:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.923]


[['a', 'little', 'girl', 'wear', 'a', 'yellow', 'dress', 'blow', 'bubble']]


Processing Epoch 5:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=87.039]


[['a', 'girl', 'be', 'paint', 'outside', 'with', 'her', 'hair', 'outside'], ['a', 'girl', 'be', 'paint', 'outside', 'with', 'her', 'hair', 'girl'], ['a', 'little', 'girl', 'be', 'paint', 'outside', 'with', 'her', 'hair'], ['a', 'girl', 'be', 'paint', 'outdoors', 'with', 'her', 'hair', 'girl']]


Processing Epoch 6:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.813]
Processing Epoch 7:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.734]


[['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 8:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.612]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 9:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.553]
Processing Epoch 10:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.396]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'yard']]


Processing Epoch 11:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=86.258]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 12:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=86.128]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 13:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=86.254]
Processing Epoch 14:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=86.186]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 15:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=86.113]
Processing Epoch 16:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=86.066]
Processing Epoch 17:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=86.092]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 18:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=85.992]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'yard']]


Processing Epoch 19:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=85.994]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 20:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.992]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'yard'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'ground'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'field']]


Processing Epoch 21:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.046]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 22:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.981]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'on', 'the', 'grassy', 'ground']]


Processing Epoch 23:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=86.019]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 24:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.966]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'bush'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'clothing'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'on', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'hay']]


Processing Epoch 25:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.841]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 26:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.927]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 27:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.878]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 28:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=86.001]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 29:: 100%|██████████| 1303/1303 [05:09<00:00,  4.22it/s, loss=85.856]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'field'], ['young', 'girl', 'with', 'pigtail', 'paint', 'house', 'at', 'a', 'market']]


Processing Epoch 30:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.994]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 31:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.883]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 32:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.885]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 33:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.919]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 34:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.967]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 35:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.885]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 36:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.816]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'yard'], ['young', 'child', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 37:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.867]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'yard']]


Processing Epoch 38:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.858]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'distance'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'of', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 39:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.882]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'child', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'boy', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 40:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.849]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'long', 'hair', 'play', 'with', 'a', 'bubble']]


Processing Epoch 41:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.822]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'a', 'stuff', 'animal', 'with', 'her', 'paint']]


Processing Epoch 42:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.758]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 43:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=85.963]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['two', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 44:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.770]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'with', 'bubble', 'in', 'the', 'grass']]


Processing Epoch 45:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.828]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'long', 'hair', 'cover', 'in', 'grass']]


Processing Epoch 46:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.832]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'yard'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'on', 'her', 'face'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside']]


Processing Epoch 47:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.869]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'blue', 'hair', 'wear', 'a', 'bright', 'colored', 'shirt'], ['girl', 'with', 'brightly', 'colored', 'hair', 'wear', 'a', 'green', 'shirt'], ['girl', 'with', 'brightly', 'colored', 'hair', 'wear', 'a', 'blue', 'shirt']]


Processing Epoch 48:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.865]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'on', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'air'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'field']]


Processing Epoch 49:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.815]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 50:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.871]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 51:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.793]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 52:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.784]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'little', 'girl', 'be', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'long', 'hair', 'ribbon', 'in', 'the', 'grass'], ['a', 'little', 'girl', 'be', 'eat', 'a', 'piece', 'of', 'bread'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'field']]


Processing Epoch 53:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.868]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 54:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.802]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 55:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.778]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 56:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.716]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 57:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.807]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 58:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.747]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'an', 'open', 'field']]


Processing Epoch 59:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.833]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 60:: 100%|██████████| 1303/1303 [05:09<00:00,  4.20it/s, loss=85.838]
Processing Epoch 61:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.727]


[['young', 'girl', 'with', 'blue', 'hair', 'blow', 'a', 'bubble']]


Processing Epoch 62:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=85.842]
Processing Epoch 63:: 100%|██████████| 1303/1303 [05:10<00:00,  4.20it/s, loss=85.826]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 64:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.839]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 65:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.861]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'a', 'field']]


Processing Epoch 66:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.824]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 67:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.704]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 68:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.836]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 69:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.831]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 70:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.744]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'grass', 'field'], ['young', 'girl', 'with', 'pigtail', 'hang', 'outside', 'in', 'the', 'grass']]


Processing Epoch 71:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.748]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'little', 'girl', 'be', 'sit', 'in', 'a', 'grass', 'field']]


Processing Epoch 72:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.781]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['small', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'grass']]


Processing Epoch 73:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.802]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 74:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.827]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'water'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'air'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'snow'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 75:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.747]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 76:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.746]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['kid', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'a', 'lawn']]


Processing Epoch 77:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.748]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 78:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.691]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 79:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.797]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'an', 'innertube', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'an', 'innertube', 'over', 'her', 'head']]


Processing Epoch 80:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.759]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['child', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['woman', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 81:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.819]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['woman', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 82:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.743]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 83:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.747]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['two', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 84:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.761]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside']]


Processing Epoch 85:: 100%|██████████| 1303/1303 [05:07<00:00,  4.23it/s, loss=85.722]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 86:: 100%|██████████| 1303/1303 [05:07<00:00,  4.23it/s, loss=85.717]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 87:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.830]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'child', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'snow']]


Processing Epoch 88:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.798]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside'], ['two', 'young', 'girl', 'with', 'pigtail', 'paint', 'outside'], ['young', 'girl', 'with', 'colorful', 'shirt', 'and', 'weave', 'outside'], ['young', 'girl', 'with', 'colorful', 'shirt', 'and', 'blanket']]


Processing Epoch 89:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.718]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['two', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['two', 'girl', 'be', 'sit', 'in', 'a', 'field', 'of', 'grass']]


Processing Epoch 90:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.830]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['a', 'young', 'girl', 'wear', 'a', 'colorful', 'shirt', 'be', 'swing'], ['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'a', 'grass']]


Processing Epoch 91:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.769]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 92:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.793]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'girl', 'wear', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 93:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.737]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['little', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 94:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.789]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 95:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.805]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 96:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.871]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['young', 'child', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 97:: 100%|██████████| 1303/1303 [05:08<00:00,  4.22it/s, loss=85.751]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 98:: 100%|██████████| 1303/1303 [05:08<00:00,  4.23it/s, loss=85.743]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 99:: 100%|██████████| 1303/1303 [05:09<00:00,  4.21it/s, loss=85.752]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass'], ['girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]


Processing Epoch 100:: 100%|██████████| 1303/1303 [05:07<00:00,  4.24it/s, loss=85.839]


[['young', 'girl', 'with', 'pigtail', 'paint', 'outside', 'in', 'the', 'grass']]
