In [None]:
'''
Mount Google Drive, copy data to runtime, and unzip folders

Make sure to put a link to "EC 523 Project" in your main google drive!
'''

from google.colab import drive
drive.mount('/content/drive')

! cp /content/drive/MyDrive/'Deep Learning Proj'/train.zip /content
! cp /content/drive/MyDrive/'Deep Learning Proj'/test.zip /content
! cp /content/drive/MyDrive/'Deep Learning Proj'/val.zip /content
! cp /content/drive/'My Drive'/'Deep Learning Proj'/math.txt /content

# from path will differ depending on where you saved the zip file in Google Drive
! unzip -DD -q  /content/train.zip -d  /content/
! unzip -DD -q  /content/test.zip -d  /content/
! unzip -DD -q  /content/val.zip -d  /content/



Mounted at /content/drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
'''
Length of datasets
'''
num_train_str = !ls train | wc -l
num_test_str = !ls test | wc -l
num_val_str = !ls val | wc -l
num_train = int(num_train_str[0])
num_test = int(num_test_str[0])
num_val = int(num_val_str[0])

print(f'Number of train images: {num_train}\nNumber of test images: {num_test}\nNumber of validation images: {num_val}\nTotal images: {num_train+num_test+num_val}')

Number of train images: 158480
Number of test images: 30637
Number of validation images: 6765
Total images: 195882


In [None]:
train_root = "/content/train/"
test_root = "/content/test/"
val_root = "/content/val/"
label_file = "/content/math.txt"

In [None]:
import os
import cv2
import torch.utils.data
from PIL import Image


class LatexDataset(torch.utils.data.Dataset):
  def __init__(self, transform=None, dataroot=train_root): # can change dataroot to be either train_root, test_root, val_root
        '''Initialize the dataset.'''
        self.transform = transform
        self.dataroot = dataroot
        self.labels_txt = label_file
        self._parse()

  def _parse(self):
        '''
        Parse the math.txt file.
        Populates the following private variables:
        - self.im_paths: A list of strings storing the associated image paths
        - self.labels: A list of strings, where each string is the latex code for an image
        '''
        def getImPath(idx):
            # Find image in either train, test, or validation folder
            imname = str(idx - 1).zfill(7) + '.png'
            if os.path.exists(f'{self.dataroot}{imname}'):
              impath = f'{self.dataroot}{imname}'
            else:
              return None

            try:
                Image.open(impath).verify()
            except Exception as e:
                # Some images can't be opened
                # print(f"Image at path {impath} is corrupted. Error: {e}")
                return None

            return impath

        self.im_paths = []
        self.labels = []

        with open(self.labels_txt) as f:
            for idx, line in enumerate(f):
                impath = getImPath(idx+1)

                if impath is not None:
                    labels = line.strip('\n')
                    if len(labels) < 700:
                      self.im_paths.append(impath)            # Image name
                      self.labels.append(labels)    # String of latex code


  def __len__(self):
        '''Return length of the dataset.'''
        assert len(self.labels) == len(self.im_paths)
        return len(self.labels)

  def __getitem__(self, index):
        '''
        Return the (image, attributes) tuple.
        This function gets called when you index the dataset.
        '''
        def img_load(index):
            imraw = Image.open(self.im_paths[index])
            imgray = imraw.convert('L')                         # Convert image to greyscale
            imthresh = imgray.point(lambda p: p > 240 and 255)  # Threshold image to remove background (white)
            im = self.transform(imthresh)
            return im

        target = self.labels[index]
        return img_load(index), target

In [None]:
'''
Dictionary block: converts a LaTeX string to a dictionary of latex tokens, where
each unique token has its own entry and integer value assigned to it

'''
class LatexDict():
    def __init__(self, num_tokens=256):
        self.labels_txt = label_file
        self.num_tokens = num_tokens
        self.latex_dict = {'<UKN>':0, '<PAD>':1} # Initialize with token for unknown and for padding
        self.latex_dict_inverse = {0:'<UKN>', 1:'<PAD>'} # Initialize inverse dict for quicker reverse lookups
        self.create_dict()

    def create_dict(self):
        # Go through entire label file and populate dictionary
        with open(self.labels_txt) as f:
            for line in f:
                tokens = line.split()
                for token in tokens:
                    if token not in self.latex_dict:
                        # Assign a new ID for the unseen token
                        new_id = len(self.latex_dict)
                        self.latex_dict[token] = new_id
                        self.latex_dict_inverse[new_id] = token

    def map_tokens(self, tex_str_list, batch_size):
        ids_tensor = torch.full((batch_size, self.num_tokens), self.latex_dict['<PAD>'], dtype=torch.float32)

        for row, tex_str in enumerate(tex_str_list):
            tokens = tex_str.split()
            for col, token in enumerate(tokens):
                ids_tensor[row, col] = self.latex_dict[token]

        return ids_tensor

    def tokens_to_tex(self, token_vec):
        tex_str = ' '
        for token_id in token_vec.tolist():
            if token_id in self.latex_dict_inverse:
                if self.latex_dict_inverse[token_id] != '<PAD>' and self.latex_dict_inverse[token_id] != '<UKN>':
                    tex_str += self.latex_dict_inverse[token_id] + ' '

        return tex_str

    def __dict__(self):
        return self.latex_dict

    def __len__(self):
        return len(self.latex_dict)

# latex_dict = LatexDict()


Creating the CNN Block

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch.optim as optim
import math

class CNN_Block(nn.Module):
    def __init__(self):
        super(CNN_Block, self).__init__()
        # self.conv1 = nn.Conv2d(3, 64, 3)
        self.conv1 = nn.Conv2d(1, 64, 3, padding=1)    # Images are originally one channel, added padding as well
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(256, 256, 3, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2)

        #self.fc1 = nn.Linear(256 * 2 * 8, 1024)  # Adjusted input size based on the output size of the convolutional layers
        #self.fc2 = nn.Linear(1024, 512)


    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = F.relu(self.conv4(x))
        #print('x: shape:', x.shape)
        #x = x.view(64,-1)   # Flatten so this can be used in linear layers

        #x = F.relu(self.fc1(x))
        #x = self.fc2(x)

        return x


In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        #self.num_layers = num_layers
        self.batch_size = batch_size

        self.cell_state = None
        self.hidden_state = None

        #Input-gate parameters
        self.W_i = nn.Parameter(torch.zeros(hidden_size, input_size + hidden_size))
        self.b_i = nn.Parameter(torch.zeros(hidden_size, 1))
        #forget_gate parameters
        self.w_f = nn.Parameter(torch.zeros(hidden_size,  input_size + hidden_size))
        self.b_f = nn.Parameter(torch.zeros(hidden_size, 1))
        #candidate parameters
        self.w_c = nn.Parameter(torch.zeros(hidden_size, input_size + hidden_size))
        self.b_c = nn.Parameter(torch.zeros(hidden_size, 1))
        #output gate parameters
        self.w_o = nn.Parameter(torch.zeros(hidden_size, input_size + hidden_size))
        self.b_o = nn.Parameter(torch.zeros(hidden_size, 1))

        self.init_weights()
        self.reset_LSTM_states(batch_size)
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    def init_weights(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def reset_LSTM_states(self, batch_size):
        self.cell_state = torch.zeros(batch_size, self.hidden_size)
        self.hidden_state = torch.zeros(batch_size, self.hidden_size)

    def forward(self, x):
        #print('here',x.shape, self.hidden_state.shape)

        X_H = torch.cat((x.T.to(self.device), self.hidden_state.to(self.device)), 1)
        input_update = torch.sigmoid(torch.matmul(self.W_i.to(self.device), X_H.T.to(self.device)) + self.b_i.to(self.device))
        forget_update = torch.sigmoid(torch.matmul(self.w_f.to(self.device), X_H.T.to(self.device)) + self.b_f.to(self.device))
        #print('forget update',forget_update.shape)
        #print('input update',input_update.shape)
        candidate_update = torch.tanh(torch.matmul(self.w_c.to(self.device), X_H.T.to(self.device)) + self.b_c.to(self.device))
        #print('candi update',candidate_update.shape)
        #print('cell_state',self.cell_state.shape)
        self.cell_state = forget_update.T.to(self.device) * self.cell_state.to(self.device) + input_update.T.to(self.device) * candidate_update.T.to(self.device)
        output_update = torch.sigmoid(torch.matmul(self.w_o.to(self.device), X_H.T.to(self.device)) + self.b_o.to(self.device))
        self.hidden_state = output_update.T.to(self.device) * torch.tanh(self.cell_state.to(self.device))
        #print('hidden_state',self.hidden_state.shape)
        return self.hidden_state.T

In [None]:
class attention(nn.Module):
    def __init__(self, beta_size, hidden_size, v_length):
        super(attention, self).__init__()
        #weights for the hidden layer
        self.w_h = nn.Linear(hidden_size, beta_size, bias=False)
        #weights for the encoded image
        self.w_v = nn.Linear(v_length, beta_size, bias=False)
        #weights for the betas
        self.w_beta = nn.Parameter(torch.Tensor(beta_size))
        nn.init.uniform_(self.w_beta, -1e-2, 1e-2)

        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.init_weights()

    def init_weights(self):
        torch.nn.init.xavier_uniform_(self.w_h.weight)
        torch.nn.init.xavier_uniform_(self.w_v.weight)

    def forward(self, V_new, h_t):
        #Multiplication
        U_t = torch.tanh(self.w_h(h_t.to(self.device)).unsqueeze(1) + self.w_v(V_new.to(self.device))) # [B, H' * W', C]

        #activation + sum
        E_t = torch.sum(U_t.to(self.device) * self.w_beta.to(self.device), dim=-1)

        #activation
        A_t = torch.softmax(E_t.to(self.device), dim = 1).unsqueeze(1)

        C_t = torch.matmul(A_t.to(self.device), V_new.to(self.device)).squeeze(1)

        return C_t, A_t

In [None]:
def PositionalEmbedding2D(D_model,height,width):
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  if D_model % 4 != 0:
        raise ValueError("Cannot use sin/cos positional encoding with "
                         "odd dimension (got dim={:d})".format(D_model))
  pe = torch.zeros(D_model,height,width)
  d_model = int(D_model / 2)
  div_term = torch.exp(torch.arange(0., d_model, 2) * -(math.log(10000.0) / d_model))
  pos_w = torch.arange(0., width).unsqueeze(1)
  pos_h = torch.arange(0., height).unsqueeze(1)
  pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
  pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
  pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
  pe[d_model + 1::2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)

  return pe


In [None]:
class BeamSearch(nn.Module):

    def __init__(self, end_index: int,
                 max_steps: int = 50,
                 beam_size: int = 10,
                 per_node_beam_size: int = None) -> None:
        self._end_index = end_index
        self.max_steps = max_steps
        self.beam_size = beam_size
        self.per_node_beam_size = per_node_beam_size or beam_size

    def search(self, start_predictions, start_state, step):
        batch_size = start_predictions.size()[0]


In [None]:
'''
Overall Model Class
'''

class Model(nn.Module):
    def __init__(self, embedding_size, hidden_size, batch_size, sequence_length, vocab_size, o_layer_size, v_length=256):
          super().__init__()
          # Static params
          self.v_length = v_length
          self.batch_size = batch_size
          self.sequence_length = sequence_length
          self.vocab_size = vocab_size
          self.o_layer_size = o_layer_size
          input_size = embedding_size + o_layer_size
          self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

          # Network Modules
          self.CNN = CNN_Block()
          self.LSTM_module = LSTM(input_size, hidden_size, batch_size)
          self.AttentionMechanism = attention(beta_size=256, hidden_size=hidden_size, v_length=v_length) # TODO: Change these hard-coded values

          # The other layers
          # self.E = nn.Parameter(torch.zeros(embedding_size, vocab_size)).double()
          self.E = nn.Embedding(vocab_size, embedding_size)
          self.O = nn.Linear(v_length + hidden_size, o_layer_size, bias=False)
          self.W_out = nn.Linear(o_layer_size, vocab_size, bias=False)
          self.softmax = nn.Softmax(1)

          # Initialization of h_t
          self.init_Wh = nn.Linear(v_length, hidden_size)

          # Beam search
          #self.beam_size = 3
          #self._beam_search = BeamSearch(self.vocab_size - 2, self.sequence_length, beam_size = self.beam_size)

    def forward(self, X_batch, labels):
        self.train()

        # 1) CNN, aka "HyperCube Creation" :)
        V = self.CNN(X_batch).to(self.device)

        # Transforming into a
        #print('V shape',V.shape)
        #print('V shape',V.shape)
        pe2 = (PositionalEmbedding2D(256, V.size(2), V.size(3)).repeat(self.batch_size,1,1,1)).to(self.device)
        #print(pe2.shape)
        V = V + pe2[:, :, : V.size(2), : V.size(3)]
        V = V.permute(0, 2, 3, 1)

        batch_size, H_prime, W_prime, C = V.shape
        V = torch.reshape(V, (batch_size, H_prime * W_prime, C))
        V = V
        # Pre-allocate memory
        output = torch.zeros(self.batch_size, self.sequence_length, self.vocab_size).to(self.device)

        # Initialize Y and O
        Y_t = (self.vocab_size - 3) * torch.ones(self.batch_size).to(self.device)
        O_t = torch.zeros(self.o_layer_size, self.batch_size).to(self.device)

        # Reset S_t
        self.LSTM_module.reset_LSTM_states(batch_size)

        # Initialize H_t
        mean_encoder_out = torch.mean(V, 1).to(self.device)
        #print('mean encoder out:',mean_encoder_out.shape)
        H_t = torch.transpose(torch.tanh(self.init_Wh(mean_encoder_out)), 0, 1).to(self.device)
        #print('H_t',H_t.shape)
        self.LSTM_module.H_t = H_t
        #print('y_t before',Y_t.shape)
        for i in range(self.sequence_length):
            #print('For loop : O_t , V', O_t.shape,V.shape)
            O_t, logits, _ = self.step_decoding(O_t.to(self.device), V.to(self.device), Y_t.to(self.device), True)

            output[:, i, :] = logits

            # Next in the sequence
            #Y_t = labels[:, i]

            Y_t = torch.argmax(logits, dim=1)
            #print('there, yt',Y_t.shape)

        #return output
        return output

    def step_decoding(self, O_t, V, Y_t, soft_max = True):
        # The input

        #print('hrererer')
        #print(Y_t)
        Embedded_vec = self.E(Y_t.long())
        X_t = torch.cat((torch.transpose(Embedded_vec, 0, 1), O_t), 0)
        #print(X_t.shape)
        # Update hidden states
        H_t = self.LSTM_module(X_t)

        # Attention mechanism
        C_t, _ = self.AttentionMechanism(V, torch.transpose(H_t, 0, 1))

        # O_t
        #print('ct',C_t.shape)
        #print('ht',H_t.shape)
        concat = torch.transpose(torch.cat((H_t, C_t.T), 0), 0, 1)
        #print('concat',concat.shape)
        linear_O = self.O(concat)
        O_t = torch.tanh(linear_O)
        logits = self.W_out(O_t)
        #print('logits',logits.shape)
        O_t = torch.transpose(O_t, 0, 1)
        #print('O-t',O_t.shape)

        if soft_max:
            logits = self.softmax(logits)
        return O_t, logits, (H_t, C_t)

    def Predict(self, logits):
      pass

    def forward_predict(self, X_batch):
        self.eval()
        # 1) CNN, aka "HyperCube Creation" :)
        with torch.no_grad():
            V = self.CNN(X_batch)

            # Transforming into a cube
            V = V.permute(0, 2, 3, 1)
            batch_size, H_prime, W_prime, C = V.shape
            V = torch.reshape(V, (batch_size, H_prime * W_prime, C))

            # Pre-allocate memory
            output = torch.zeros(batch_size, self.sequence_length, self.vocab_size).double()

            # Initialize Y and O
            Y_t = (self.vocab_size - 3) * torch.ones(batch_size).long()
            O_t = torch.zeros(self.o_layer_size, batch_size).double()

            # Reset S_t
            self.LSTM_module.reset_LSTM_states(batch_size)

            # Initialize H_t
            mean_encoder_out = torch.mean(V, 1)
            H_t = torch.transpose(torch.tanh(self.init_Wh(mean_encoder_out)), 0, 1)
            self.LSTM_module.H_t = H_t

            for i in range(self.sequence_length):
                O_t, logits, _ = self.step_decoding(O_t.double(), V.double(), Y_t.double(), True)

                output[:, i, :] = logits

                # Greedy approach
                Y_t = torch.argmax(logits, dim=1)

        return output

In [None]:
'''
Initialize dataset and image preprocessing

NOTE:
    Some of the images in the dataset are corrupted. To deal with this,
    there is a check for each image to ensure that it can be loaded.
'''
import torchvision.transforms as transforms

reduced_imsize = (32, 128)  # Images are reduced to this size

# Define the transform pipeline - add normalization?
transform = transforms.Compose([
    transforms.Resize(reduced_imsize),
    transforms.ToTensor(),
])

train_dataset = LatexDataset(transform=transform, dataroot=train_root)
test_dataset = LatexDataset(transform=transform, dataroot=test_root)
val_dataset = LatexDataset(transform=transform, dataroot=val_root)

# Device settings
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

Device: cuda:0


In [None]:
'''
Initialize hyperperameters, trainloader, and dictionary of LaTeX token mappings
'''

# Hyperparameters
batch_size = 32
learning_rate = 0.0001
weight_decay = 0.00001  # (L2 penalty)

max_tokens = 700        # Maximum number of tokens in a latex string
latex_dict = LatexDict(num_tokens=max_tokens)

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8,drop_last=True)

# print(f'Dictionary length: {latex_dict.__len__()}')
# print(f'Dictionary: {latex_dict.__dict__()}')

In [None]:
'''
Initialize Model

Initialize Loss Functions:
1. Normal Cross-Entropy Loss between prediction and label
2. LaTeX compile test:
    - Custom function, returns True if code can compile into LaTeX, False if not

Initialize Optimizer:
1. Adam Optimizer
'''

    # Hyperparameters
embedding_size = 80; # number of rows in the E-matrix
o_layer_size = 700  ;  # size of o-vektorn TODO: What should this be?
hidden_size = 700  ;
sequence_length = 700  ; vocab_size = 700  ;
batch_size = 32
n_epochs = 2
beam_size = 5
model = Model(embedding_size=embedding_size, hidden_size=hidden_size, batch_size=batch_size, sequence_length=sequence_length, vocab_size=vocab_size, o_layer_size = o_layer_size).to(device)
criterion = nn.CrossEntropyLoss()   ## May need to change this
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate, weight_decay=0)

In [None]:
pbar_test = trainloader
images, y = next(iter(pbar_test))
y_vec = latex_dict.map_tokens(list(y), batch_size=batch_size)
print(y_vec)
latexx = latex_dict.tokens_to_tex(y_vec[0])
print(latexx)

print(y_vec.shape)


y_vec = torch.tensor(y_vec, dtype=torch.long)
y_vec = torch.LongTensor(y_vec)
one_hot = torch.nn.functional.one_hot(y_vec, num_classes=max_tokens)
print(one_hot.size())
print(one_hot)

In [None]:
from tqdm import tqdm

'''
Training Loop
'''

num_epoch = 10

for epoch in range(num_epoch):
    print('epoch:', epoch)
    pbar = tqdm(trainloader)
    for images, y in pbar:

        images = images.to(device)              # Send to gpu

        y_vec = latex_dict.map_tokens(list(y), batch_size=batch_size)
        #print(y_vec)
        latexx = latex_dict.tokens_to_tex(y_vec[0])
        #print(latexx)
        y_vec = torch.tensor(y_vec, dtype=torch.long)
        y_vec = torch.LongTensor(y_vec)
        one_hot = torch.nn.functional.one_hot(y_vec, num_classes=max_tokens)
        one_hot = one_hot.to(device)                # Send to gpu
        #print(y_vec[0].shape)
        #print(images.shape)
        #V = images.reshape(images.shape[0], images.shape[1] * images.shape[2], images.shape[3])
        torch.nn.utils.clip_grad_norm_(model.parameters(),max_norm=1.0)
        predictions = model.forward(images,one_hot)        # Get predictions

        print(torch.argmax(predictions[1]),torch.argmax(one_hot[1]))
        print(predictions.shape,one_hot.shape)
        one_hot = torch.tensor(one_hot, dtype=torch.float)
        #print('y_vec',y_vec.shape)
        #print('pred',predictions.shape)
        loss1 = criterion(predictions, y_vec.to(device))   # Calculate first loss function
        print(loss1)
        # loss2 = can_compile(precitions)         # Check if output can compile

        optimizer.zero_grad()

        loss1.backward()
        optimizer.step()


epoch: 0


  y_vec = torch.tensor(y_vec, dtype=torch.long)


tensor(1062, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  one_hot = torch.tensor(one_hot, dtype=torch.float)
  0%|          | 1/4921 [00:02<3:51:02,  2.82s/it]

tensor(1062, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 2/4921 [00:04<3:13:13,  2.36s/it]

tensor(1407, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 3/4921 [00:06<3:01:18,  2.21s/it]

tensor(18907, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 4/4921 [00:08<2:55:02,  2.14s/it]

tensor(18907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 5/4921 [00:10<2:51:06,  2.09s/it]

tensor(18907, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 6/4921 [00:13<2:51:47,  2.10s/it]

tensor(18907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 7/4921 [00:15<2:53:01,  2.11s/it]

tensor(17507, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 8/4921 [00:17<2:57:05,  2.16s/it]

tensor(17507, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 9/4921 [00:19<2:53:09,  2.12s/it]

tensor(18907, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 10/4921 [00:21<2:50:24,  2.08s/it]

tensor(18907, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 11/4921 [00:23<2:48:08,  2.05s/it]

tensor(21007, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 12/4921 [00:25<2:47:16,  2.04s/it]

tensor(22407, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 13/4921 [00:27<2:48:08,  2.06s/it]

tensor(35707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 14/4921 [00:29<2:48:09,  2.06s/it]

tensor(46907, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5511, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 15/4921 [00:31<2:50:49,  2.09s/it]

tensor(29407, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5510, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 16/4921 [00:33<2:48:57,  2.07s/it]

tensor(32207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5510, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 17/4921 [00:35<2:47:46,  2.05s/it]

tensor(284207, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5510, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 18/4921 [00:37<2:47:25,  2.05s/it]

tensor(40607, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5510, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 19/4921 [00:39<2:48:53,  2.07s/it]

tensor(36407, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5510, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 20/4921 [00:42<2:50:14,  2.08s/it]

tensor(39907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5510, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 21/4921 [00:44<2:49:12,  2.07s/it]

tensor(49707, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5509, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 22/4921 [00:46<2:48:24,  2.06s/it]

tensor(441707, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5509, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 23/4921 [00:48<2:50:40,  2.09s/it]

tensor(42707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5508, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  0%|          | 24/4921 [00:50<2:48:33,  2.07s/it]

tensor(254807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5508, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 25/4921 [00:52<2:48:25,  2.06s/it]

tensor(47607, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5507, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 26/4921 [00:54<2:48:44,  2.07s/it]

tensor(51807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5506, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 27/4921 [00:56<2:49:00,  2.07s/it]

tensor(91007, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5506, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 28/4921 [00:58<2:47:18,  2.05s/it]

tensor(44107, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5505, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 29/4921 [01:00<2:46:27,  2.04s/it]

tensor(47607, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5504, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 30/4921 [01:02<2:45:40,  2.03s/it]

tensor(43407, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5503, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 31/4921 [01:04<2:46:14,  2.04s/it]

tensor(44807, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5503, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 32/4921 [01:06<2:50:33,  2.09s/it]

tensor(97307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5502, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 33/4921 [01:08<2:49:53,  2.09s/it]

tensor(27307, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5501, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 34/4921 [01:11<2:49:33,  2.08s/it]

tensor(39907, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5500, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 35/4921 [01:13<2:47:48,  2.06s/it]

tensor(29407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5501, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 36/4921 [01:15<2:47:09,  2.05s/it]

tensor(29407, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 37/4921 [01:17<2:47:05,  2.05s/it]

tensor(37107, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 38/4921 [01:19<2:48:16,  2.07s/it]

tensor(21007, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 39/4921 [01:21<2:48:35,  2.07s/it]

tensor(19607, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 40/4921 [01:23<2:52:20,  2.12s/it]

tensor(22407, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 41/4921 [01:25<2:50:57,  2.10s/it]

tensor(18207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5500, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 42/4921 [01:27<2:50:12,  2.09s/it]

tensor(18207, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 43/4921 [01:29<2:49:50,  2.09s/it]

tensor(16107, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 44/4921 [01:31<2:49:43,  2.09s/it]

tensor(16107, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 45/4921 [01:33<2:49:20,  2.08s/it]

tensor(13307, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5499, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 46/4921 [01:36<2:50:08,  2.09s/it]

tensor(16807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 47/4921 [01:38<2:49:46,  2.09s/it]

tensor(13307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 48/4921 [01:40<2:52:26,  2.12s/it]

tensor(15407, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5501, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 49/4921 [01:42<2:50:25,  2.10s/it]

tensor(14707, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5502, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 50/4921 [01:44<2:49:30,  2.09s/it]

tensor(15407, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5500, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 51/4921 [01:46<2:49:31,  2.09s/it]

tensor(14707, device='cuda:0') tensor(189, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5500, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 52/4921 [01:48<2:48:33,  2.08s/it]

tensor(14707, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 53/4921 [01:50<2:50:00,  2.10s/it]

tensor(14707, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 54/4921 [01:52<2:50:34,  2.10s/it]

tensor(15407, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 55/4921 [01:54<2:48:48,  2.08s/it]

tensor(15407, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 56/4921 [01:57<2:52:51,  2.13s/it]

tensor(9807, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 57/4921 [01:59<2:51:05,  2.11s/it]

tensor(17507, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 58/4921 [02:01<2:48:58,  2.08s/it]

tensor(14007, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 59/4921 [02:03<2:48:26,  2.08s/it]

tensor(14007, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 60/4921 [02:05<2:49:25,  2.09s/it]

tensor(11907, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|          | 61/4921 [02:07<2:49:30,  2.09s/it]

tensor(11907, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 62/4921 [02:09<2:50:01,  2.10s/it]

tensor(12607, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 63/4921 [02:11<2:48:58,  2.09s/it]

tensor(11907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 64/4921 [02:13<2:51:10,  2.11s/it]

tensor(9807, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 65/4921 [02:15<2:48:50,  2.09s/it]

tensor(9807, device='cuda:0') tensor(350, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 66/4921 [02:17<2:48:29,  2.08s/it]

tensor(9807, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 67/4921 [02:19<2:49:09,  2.09s/it]

tensor(9807, device='cuda:0') tensor(112, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 68/4921 [02:22<2:50:18,  2.11s/it]

tensor(11207, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 69/4921 [02:24<2:48:52,  2.09s/it]

tensor(13307, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 70/4921 [02:26<2:47:30,  2.07s/it]

tensor(11207, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 71/4921 [02:28<2:46:55,  2.06s/it]

tensor(10507, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 72/4921 [02:30<2:50:52,  2.11s/it]

tensor(15407, device='cuda:0') tensor(11, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  1%|▏         | 73/4921 [02:32<2:50:47,  2.11s/it]

tensor(10507, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 74/4921 [02:34<2:50:50,  2.11s/it]

tensor(9807, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 75/4921 [02:36<2:50:37,  2.11s/it]

tensor(10507, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 76/4921 [02:38<2:49:34,  2.10s/it]

tensor(11207, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 77/4921 [02:40<2:48:27,  2.09s/it]

tensor(9807, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 78/4921 [02:43<2:47:44,  2.08s/it]

tensor(14007, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 79/4921 [02:45<2:47:45,  2.08s/it]

tensor(9807, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 80/4921 [02:47<2:53:17,  2.15s/it]

tensor(10507, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 81/4921 [02:49<2:51:51,  2.13s/it]

tensor(12607, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 82/4921 [02:51<2:51:02,  2.12s/it]

tensor(11907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 83/4921 [02:53<2:49:44,  2.11s/it]

tensor(11907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 84/4921 [02:55<2:47:15,  2.07s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 85/4921 [02:57<2:46:17,  2.06s/it]

tensor(9107, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 86/4921 [02:59<2:47:56,  2.08s/it]

tensor(11907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 87/4921 [03:01<2:49:19,  2.10s/it]

tensor(9107, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 88/4921 [03:04<2:51:34,  2.13s/it]

tensor(14707, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 89/4921 [03:06<2:49:48,  2.11s/it]

tensor(9807, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 90/4921 [03:08<2:47:38,  2.08s/it]

tensor(9807, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 91/4921 [03:10<2:46:16,  2.07s/it]

tensor(9807, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 92/4921 [03:12<2:46:24,  2.07s/it]

tensor(9807, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 93/4921 [03:14<2:47:17,  2.08s/it]

tensor(11207, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 94/4921 [03:16<2:47:43,  2.08s/it]

tensor(10507, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 95/4921 [03:18<2:46:34,  2.07s/it]

tensor(14707, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 96/4921 [03:20<2:49:38,  2.11s/it]

tensor(12607, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 97/4921 [03:22<2:48:22,  2.09s/it]

tensor(15407, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 98/4921 [03:24<2:47:11,  2.08s/it]

tensor(16807, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 99/4921 [03:27<2:47:43,  2.09s/it]

tensor(11207, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 100/4921 [03:29<2:47:48,  2.09s/it]

tensor(14707, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 101/4921 [03:31<2:47:25,  2.08s/it]

tensor(13307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 102/4921 [03:33<2:46:56,  2.08s/it]

tensor(14007, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 103/4921 [03:35<2:47:52,  2.09s/it]

tensor(13307, device='cuda:0') tensor(98, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 104/4921 [03:37<2:50:59,  2.13s/it]

tensor(11907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 105/4921 [03:39<2:51:19,  2.13s/it]

tensor(14007, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 106/4921 [03:41<2:50:01,  2.12s/it]

tensor(16107, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 107/4921 [03:43<2:49:34,  2.11s/it]

tensor(13307, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 108/4921 [03:45<2:48:37,  2.10s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 109/4921 [03:48<2:47:47,  2.09s/it]

tensor(13307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 110/4921 [03:50<2:48:10,  2.10s/it]

tensor(14007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 111/4921 [03:52<2:48:17,  2.10s/it]

tensor(11907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 112/4921 [03:54<2:51:23,  2.14s/it]

tensor(11907, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 113/4921 [03:56<2:51:00,  2.13s/it]

tensor(11907, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 114/4921 [03:58<2:51:26,  2.14s/it]

tensor(11207, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 115/4921 [04:00<2:50:33,  2.13s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 116/4921 [04:02<2:49:24,  2.12s/it]

tensor(11907, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 117/4921 [04:05<2:49:42,  2.12s/it]

tensor(11207, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 118/4921 [04:07<2:48:31,  2.11s/it]

tensor(10507, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 119/4921 [04:09<2:48:43,  2.11s/it]

tensor(16807, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 120/4921 [04:11<2:54:05,  2.18s/it]

tensor(13307, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 121/4921 [04:13<2:51:53,  2.15s/it]

tensor(10507, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 122/4921 [04:15<2:50:48,  2.14s/it]

tensor(21007, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  2%|▏         | 123/4921 [04:17<2:50:25,  2.13s/it]

tensor(9807, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 124/4921 [04:20<2:49:25,  2.12s/it]

tensor(15407, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 125/4921 [04:22<2:48:13,  2.10s/it]

tensor(9807, device='cuda:0') tensor(539, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 126/4921 [04:24<2:48:30,  2.11s/it]

tensor(17507, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 127/4921 [04:26<2:47:51,  2.10s/it]

tensor(13307, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 128/4921 [04:28<2:50:12,  2.13s/it]

tensor(10507, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 129/4921 [04:30<2:49:15,  2.12s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 130/4921 [04:32<2:48:03,  2.10s/it]

tensor(11207, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 131/4921 [04:34<2:46:49,  2.09s/it]

tensor(12607, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 132/4921 [04:36<2:45:56,  2.08s/it]

tensor(23107, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 133/4921 [04:38<2:46:47,  2.09s/it]

tensor(11207, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 134/4921 [04:40<2:47:15,  2.10s/it]

tensor(23107, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 135/4921 [04:43<2:47:18,  2.10s/it]

tensor(14707, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 136/4921 [04:45<2:50:08,  2.13s/it]

tensor(14007, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 137/4921 [04:47<2:49:32,  2.13s/it]

tensor(11207, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 138/4921 [04:49<2:47:56,  2.11s/it]

tensor(11907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 139/4921 [04:51<2:47:51,  2.11s/it]

tensor(14007, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 140/4921 [04:53<2:47:40,  2.10s/it]

tensor(11207, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 141/4921 [04:55<2:47:55,  2.11s/it]

tensor(13307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 142/4921 [04:57<2:45:56,  2.08s/it]

tensor(9107, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 143/4921 [04:59<2:43:35,  2.05s/it]

tensor(11207, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 144/4921 [05:01<2:46:10,  2.09s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 145/4921 [05:03<2:44:29,  2.07s/it]

tensor(11207, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 146/4921 [05:06<2:45:03,  2.07s/it]

tensor(11207, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 147/4921 [05:08<2:44:43,  2.07s/it]

tensor(11907, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 148/4921 [05:10<2:43:45,  2.06s/it]

tensor(9107, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 149/4921 [05:12<2:43:13,  2.05s/it]

tensor(9107, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 150/4921 [05:14<2:42:18,  2.04s/it]

tensor(9807, device='cuda:0') tensor(173, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 151/4921 [05:16<2:41:43,  2.03s/it]

tensor(9807, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 152/4921 [05:18<2:45:38,  2.08s/it]

tensor(12607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 153/4921 [05:20<2:47:49,  2.11s/it]

tensor(10507, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 154/4921 [05:22<2:47:05,  2.10s/it]

tensor(17507, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 155/4921 [05:24<2:45:11,  2.08s/it]

tensor(10507, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 156/4921 [05:26<2:43:52,  2.06s/it]

tensor(11207, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 157/4921 [05:28<2:42:50,  2.05s/it]

tensor(18207, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 158/4921 [05:30<2:42:37,  2.05s/it]

tensor(13307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 159/4921 [05:32<2:44:03,  2.07s/it]

tensor(12607, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 160/4921 [05:35<2:50:06,  2.14s/it]

tensor(9807, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 161/4921 [05:37<2:47:12,  2.11s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 162/4921 [05:39<2:45:11,  2.08s/it]

tensor(10507, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 163/4921 [05:41<2:43:21,  2.06s/it]

tensor(17507, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 164/4921 [05:43<2:42:15,  2.05s/it]

tensor(11207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 165/4921 [05:45<2:41:49,  2.04s/it]

tensor(11907, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 166/4921 [05:47<2:43:09,  2.06s/it]

tensor(12607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 167/4921 [05:49<2:42:43,  2.05s/it]

tensor(9807, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 168/4921 [05:51<2:45:37,  2.09s/it]

tensor(10507, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 169/4921 [05:53<2:43:54,  2.07s/it]

tensor(13307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 170/4921 [05:55<2:43:04,  2.06s/it]

tensor(12607, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 171/4921 [05:57<2:42:24,  2.05s/it]

tensor(9807, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  3%|▎         | 172/4921 [05:59<2:43:11,  2.06s/it]

tensor(9807, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 173/4921 [06:01<2:43:52,  2.07s/it]

tensor(9807, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 174/4921 [06:03<2:42:32,  2.05s/it]

tensor(9107, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 175/4921 [06:06<2:43:03,  2.06s/it]

tensor(12607, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 176/4921 [06:08<2:45:47,  2.10s/it]

tensor(11207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 177/4921 [06:10<2:44:16,  2.08s/it]

tensor(10507, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 178/4921 [06:12<2:43:39,  2.07s/it]

tensor(9107, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 179/4921 [06:14<2:43:43,  2.07s/it]

tensor(11907, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 180/4921 [06:16<2:44:09,  2.08s/it]

tensor(9107, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 181/4921 [06:18<2:44:16,  2.08s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 182/4921 [06:20<2:43:16,  2.07s/it]

tensor(12607, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 183/4921 [06:22<2:42:40,  2.06s/it]

tensor(9107, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▎         | 184/4921 [06:24<2:47:26,  2.12s/it]

tensor(10507, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 185/4921 [06:26<2:45:46,  2.10s/it]

tensor(9807, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 186/4921 [06:29<2:45:28,  2.10s/it]

tensor(13307, device='cuda:0') tensor(212, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 187/4921 [06:31<2:46:06,  2.11s/it]

tensor(28007, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 188/4921 [06:33<2:44:38,  2.09s/it]

tensor(13307, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 189/4921 [06:35<2:44:07,  2.08s/it]

tensor(9807, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 190/4921 [06:37<2:45:04,  2.09s/it]

tensor(9807, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 191/4921 [06:39<2:44:03,  2.08s/it]

tensor(15407, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 192/4921 [06:41<2:46:27,  2.11s/it]

tensor(8407, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 193/4921 [06:43<2:46:12,  2.11s/it]

tensor(9807, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 194/4921 [06:45<2:46:08,  2.11s/it]

tensor(17507, device='cuda:0') tensor(165, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 195/4921 [06:47<2:45:19,  2.10s/it]

tensor(9807, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 196/4921 [06:49<2:45:04,  2.10s/it]

tensor(15407, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 197/4921 [06:52<2:44:17,  2.09s/it]

tensor(11207, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 198/4921 [06:54<2:42:29,  2.06s/it]

tensor(9107, device='cuda:0') tensor(60, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 199/4921 [06:56<2:42:47,  2.07s/it]

tensor(15407, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 200/4921 [06:58<2:45:52,  2.11s/it]

tensor(9807, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 201/4921 [07:00<2:44:32,  2.09s/it]

tensor(10507, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 202/4921 [07:02<2:44:22,  2.09s/it]

tensor(11207, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 203/4921 [07:04<2:44:52,  2.10s/it]

tensor(16107, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 204/4921 [07:06<2:44:07,  2.09s/it]

tensor(11907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 205/4921 [07:08<2:43:31,  2.08s/it]

tensor(11907, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 206/4921 [07:10<2:43:21,  2.08s/it]

tensor(9807, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 207/4921 [07:12<2:45:11,  2.10s/it]

tensor(12607, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 208/4921 [07:15<2:48:14,  2.14s/it]

tensor(11207, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 209/4921 [07:17<2:46:54,  2.13s/it]

tensor(11907, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 210/4921 [07:19<2:45:14,  2.10s/it]

tensor(8407, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 211/4921 [07:21<2:43:22,  2.08s/it]

tensor(9807, device='cuda:0') tensor(20, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 212/4921 [07:23<2:42:37,  2.07s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 213/4921 [07:25<2:43:41,  2.09s/it]

tensor(8407, device='cuda:0') tensor(153, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 214/4921 [07:27<2:43:48,  2.09s/it]

tensor(10507, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 215/4921 [07:29<2:43:56,  2.09s/it]

tensor(11907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 216/4921 [07:31<2:45:29,  2.11s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 217/4921 [07:33<2:43:47,  2.09s/it]

tensor(10507, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 218/4921 [07:35<2:43:21,  2.08s/it]

tensor(9807, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 219/4921 [07:38<2:42:44,  2.08s/it]

tensor(10507, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 220/4921 [07:40<2:44:20,  2.10s/it]

tensor(8407, device='cuda:0') tensor(169, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  4%|▍         | 221/4921 [07:42<2:44:23,  2.10s/it]

tensor(10507, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 222/4921 [07:44<2:42:15,  2.07s/it]

tensor(13307, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 223/4921 [07:46<2:41:37,  2.06s/it]

tensor(13307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 224/4921 [07:48<2:44:15,  2.10s/it]

tensor(15407, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 225/4921 [07:50<2:42:38,  2.08s/it]

tensor(11207, device='cuda:0') tensor(99, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 226/4921 [07:52<2:43:34,  2.09s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 227/4921 [07:54<2:44:40,  2.10s/it]

tensor(11907, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 228/4921 [07:56<2:43:24,  2.09s/it]

tensor(7707, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 229/4921 [07:58<2:42:05,  2.07s/it]

tensor(13307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 230/4921 [08:00<2:41:08,  2.06s/it]

tensor(11207, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 231/4921 [08:02<2:40:28,  2.05s/it]

tensor(9807, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 232/4921 [08:05<2:44:01,  2.10s/it]

tensor(9107, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 233/4921 [08:07<2:44:29,  2.11s/it]

tensor(16107, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 234/4921 [08:09<2:43:24,  2.09s/it]

tensor(9107, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 235/4921 [08:11<2:42:21,  2.08s/it]

tensor(7707, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 236/4921 [08:13<2:41:30,  2.07s/it]

tensor(10507, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 237/4921 [08:15<2:40:05,  2.05s/it]

tensor(9107, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 238/4921 [08:17<2:40:21,  2.05s/it]

tensor(9807, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 239/4921 [08:19<2:41:22,  2.07s/it]

tensor(9107, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 240/4921 [08:21<2:46:29,  2.13s/it]

tensor(9107, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 241/4921 [08:23<2:44:16,  2.11s/it]

tensor(8407, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 242/4921 [08:25<2:42:11,  2.08s/it]

tensor(9807, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 243/4921 [08:28<2:40:49,  2.06s/it]

tensor(8407, device='cuda:0') tensor(589, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 244/4921 [08:30<2:39:22,  2.04s/it]

tensor(9807, device='cuda:0') tensor(164, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 245/4921 [08:32<2:39:28,  2.05s/it]

tensor(9107, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▍         | 246/4921 [08:34<2:40:18,  2.06s/it]

tensor(11207, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 247/4921 [08:36<2:40:54,  2.07s/it]

tensor(21707, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 248/4921 [08:38<2:43:11,  2.10s/it]

tensor(11907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 249/4921 [08:40<2:41:27,  2.07s/it]

tensor(9807, device='cuda:0') tensor(223, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 250/4921 [08:42<2:39:51,  2.05s/it]

tensor(14707, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 251/4921 [08:44<2:39:35,  2.05s/it]

tensor(9107, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 252/4921 [08:46<2:40:35,  2.06s/it]

tensor(8407, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 253/4921 [08:48<2:40:28,  2.06s/it]

tensor(8407, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 254/4921 [08:50<2:40:07,  2.06s/it]

tensor(14707, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 255/4921 [08:52<2:39:42,  2.05s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 256/4921 [08:54<2:44:00,  2.11s/it]

tensor(16807, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 257/4921 [08:57<2:43:39,  2.11s/it]

tensor(7707, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 258/4921 [08:59<2:42:57,  2.10s/it]

tensor(8407, device='cuda:0') tensor(164, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 259/4921 [09:01<2:41:53,  2.08s/it]

tensor(10507, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 260/4921 [09:03<2:41:53,  2.08s/it]

tensor(7707, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 261/4921 [09:05<2:40:21,  2.06s/it]

tensor(9807, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 262/4921 [09:07<2:39:35,  2.06s/it]

tensor(7707, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 263/4921 [09:09<2:38:53,  2.05s/it]

tensor(7707, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 264/4921 [09:11<2:43:51,  2.11s/it]

tensor(12607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 265/4921 [09:13<2:42:54,  2.10s/it]

tensor(11207, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 266/4921 [09:15<2:42:22,  2.09s/it]

tensor(9107, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 267/4921 [09:17<2:42:07,  2.09s/it]

tensor(12607, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 268/4921 [09:19<2:40:48,  2.07s/it]

tensor(12607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 269/4921 [09:21<2:39:37,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  5%|▌         | 270/4921 [09:23<2:40:06,  2.07s/it]

tensor(16107, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 271/4921 [09:25<2:38:39,  2.05s/it]

tensor(10507, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 272/4921 [09:28<2:40:53,  2.08s/it]

tensor(9807, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 273/4921 [09:30<2:40:41,  2.07s/it]

tensor(10507, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 274/4921 [09:32<2:40:40,  2.07s/it]

tensor(9807, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 275/4921 [09:34<2:40:26,  2.07s/it]

tensor(8407, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 276/4921 [09:36<2:40:26,  2.07s/it]

tensor(9807, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 277/4921 [09:38<2:39:26,  2.06s/it]

tensor(13307, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 278/4921 [09:40<2:38:24,  2.05s/it]

tensor(11207, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 279/4921 [09:42<2:38:03,  2.04s/it]

tensor(7707, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 280/4921 [09:44<2:42:32,  2.10s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 281/4921 [09:46<2:41:47,  2.09s/it]

tensor(7707, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 282/4921 [09:48<2:41:28,  2.09s/it]

tensor(10507, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 283/4921 [09:50<2:40:43,  2.08s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 284/4921 [09:52<2:38:39,  2.05s/it]

tensor(12607, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 285/4921 [09:54<2:37:21,  2.04s/it]

tensor(8407, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 286/4921 [09:56<2:38:16,  2.05s/it]

tensor(9107, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 287/4921 [09:59<2:38:10,  2.05s/it]

tensor(7707, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 288/4921 [10:01<2:42:04,  2.10s/it]

tensor(7707, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 289/4921 [10:03<2:40:33,  2.08s/it]

tensor(9807, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 290/4921 [10:05<2:38:53,  2.06s/it]

tensor(7007, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 291/4921 [10:07<2:37:08,  2.04s/it]

tensor(8407, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 292/4921 [10:09<2:36:41,  2.03s/it]

tensor(8407, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 293/4921 [10:11<2:36:58,  2.04s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 294/4921 [10:13<2:38:58,  2.06s/it]

tensor(9807, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 295/4921 [10:15<2:38:54,  2.06s/it]

tensor(12607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 296/4921 [10:17<2:41:20,  2.09s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 297/4921 [10:19<2:38:49,  2.06s/it]

tensor(11207, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 298/4921 [10:21<2:37:34,  2.05s/it]

tensor(8407, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 299/4921 [10:23<2:36:54,  2.04s/it]

tensor(14007, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 300/4921 [10:25<2:37:43,  2.05s/it]

tensor(20307, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 301/4921 [10:27<2:38:22,  2.06s/it]

tensor(8407, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 302/4921 [10:29<2:37:43,  2.05s/it]

tensor(11907, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 303/4921 [10:31<2:36:58,  2.04s/it]

tensor(11207, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 304/4921 [10:34<2:41:24,  2.10s/it]

tensor(10507, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 305/4921 [10:36<2:39:50,  2.08s/it]

tensor(9807, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 306/4921 [10:38<2:40:05,  2.08s/it]

tensor(7007, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▌         | 307/4921 [10:40<2:41:12,  2.10s/it]

tensor(9107, device='cuda:0') tensor(217, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 308/4921 [10:42<2:41:17,  2.10s/it]

tensor(8407, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 309/4921 [10:44<2:40:43,  2.09s/it]

tensor(7707, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 310/4921 [10:46<2:41:01,  2.10s/it]

tensor(10507, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 311/4921 [10:48<2:42:04,  2.11s/it]

tensor(11207, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 312/4921 [10:51<2:44:36,  2.14s/it]

tensor(8407, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 313/4921 [10:53<2:45:35,  2.16s/it]

tensor(7707, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 314/4921 [10:55<2:46:43,  2.17s/it]

tensor(7707, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 315/4921 [10:57<2:45:05,  2.15s/it]

tensor(7007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 316/4921 [10:59<2:43:29,  2.13s/it]

tensor(8407, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 317/4921 [11:01<2:43:01,  2.12s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 318/4921 [11:03<2:42:10,  2.11s/it]

tensor(8407, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  6%|▋         | 319/4921 [11:05<2:43:07,  2.13s/it]

tensor(7007, device='cuda:0') tensor(83, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 320/4921 [11:08<2:47:03,  2.18s/it]

tensor(7007, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 321/4921 [11:10<2:46:05,  2.17s/it]

tensor(10507, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 322/4921 [11:12<2:44:08,  2.14s/it]

tensor(8407, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 323/4921 [11:14<2:42:14,  2.12s/it]

tensor(8407, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 324/4921 [11:16<2:41:44,  2.11s/it]

tensor(7007, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 325/4921 [11:18<2:41:31,  2.11s/it]

tensor(9107, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 326/4921 [11:20<2:42:24,  2.12s/it]

tensor(9107, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 327/4921 [11:23<2:43:50,  2.14s/it]

tensor(7707, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 328/4921 [11:25<2:47:38,  2.19s/it]

tensor(18207, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 329/4921 [11:27<2:45:24,  2.16s/it]

tensor(11907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 330/4921 [11:29<2:43:20,  2.13s/it]

tensor(14707, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 331/4921 [11:31<2:43:08,  2.13s/it]

tensor(14007, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 332/4921 [11:33<2:42:26,  2.12s/it]

tensor(7707, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 333/4921 [11:35<2:43:45,  2.14s/it]

tensor(7707, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 334/4921 [11:38<2:44:16,  2.15s/it]

tensor(11907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 335/4921 [11:40<2:42:36,  2.13s/it]

tensor(7707, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 336/4921 [11:42<2:45:20,  2.16s/it]

tensor(9807, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 337/4921 [11:44<2:43:57,  2.15s/it]

tensor(8407, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 338/4921 [11:46<2:43:15,  2.14s/it]

tensor(10507, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 339/4921 [11:48<2:43:27,  2.14s/it]

tensor(9807, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 340/4921 [11:51<2:43:48,  2.15s/it]

tensor(9807, device='cuda:0') tensor(349, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 341/4921 [11:53<2:43:08,  2.14s/it]

tensor(9807, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 342/4921 [11:55<2:42:26,  2.13s/it]

tensor(8407, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 343/4921 [11:57<2:43:24,  2.14s/it]

tensor(14707, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 344/4921 [11:59<2:45:09,  2.17s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 345/4921 [12:01<2:43:26,  2.14s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 346/4921 [12:03<2:42:37,  2.13s/it]

tensor(9807, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 347/4921 [12:05<2:42:37,  2.13s/it]

tensor(11207, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 348/4921 [12:08<2:41:01,  2.11s/it]

tensor(14707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 349/4921 [12:10<2:40:22,  2.10s/it]

tensor(9807, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 350/4921 [12:12<2:39:49,  2.10s/it]

tensor(7007, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 351/4921 [12:14<2:38:18,  2.08s/it]

tensor(7007, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 352/4921 [12:16<2:41:34,  2.12s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 353/4921 [12:18<2:40:23,  2.11s/it]

tensor(10507, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 354/4921 [12:20<2:38:28,  2.08s/it]

tensor(8407, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 355/4921 [12:22<2:37:24,  2.07s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 356/4921 [12:24<2:36:49,  2.06s/it]

tensor(10507, device='cuda:0') tensor(137, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 357/4921 [12:26<2:35:19,  2.04s/it]

tensor(9107, device='cuda:0') tensor(12, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 358/4921 [12:28<2:35:19,  2.04s/it]

tensor(7007, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 359/4921 [12:30<2:34:54,  2.04s/it]

tensor(10507, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 360/4921 [12:32<2:39:09,  2.09s/it]

tensor(9107, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 361/4921 [12:34<2:38:28,  2.09s/it]

tensor(7707, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 362/4921 [12:37<2:38:55,  2.09s/it]

tensor(8407, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 363/4921 [12:39<2:36:52,  2.07s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 364/4921 [12:41<2:35:27,  2.05s/it]

tensor(10507, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 365/4921 [12:43<2:34:35,  2.04s/it]

tensor(11207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 366/4921 [12:45<2:34:41,  2.04s/it]

tensor(7007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 367/4921 [12:47<2:36:07,  2.06s/it]

tensor(7707, device='cuda:0') tensor(15, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 368/4921 [12:49<2:39:58,  2.11s/it]

tensor(11207, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  7%|▋         | 369/4921 [12:51<2:38:26,  2.09s/it]

tensor(7007, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 370/4921 [12:53<2:36:29,  2.06s/it]

tensor(8407, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 371/4921 [12:55<2:34:38,  2.04s/it]

tensor(8407, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 372/4921 [12:57<2:34:32,  2.04s/it]

tensor(11907, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 373/4921 [12:59<2:35:52,  2.06s/it]

tensor(14707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 374/4921 [13:01<2:37:42,  2.08s/it]

tensor(7007, device='cuda:0') tensor(338, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 375/4921 [13:03<2:36:38,  2.07s/it]

tensor(7007, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 376/4921 [13:05<2:38:35,  2.09s/it]

tensor(17507, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 377/4921 [13:07<2:37:08,  2.07s/it]

tensor(7707, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 378/4921 [13:09<2:35:02,  2.05s/it]

tensor(8407, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 379/4921 [13:12<2:34:42,  2.04s/it]

tensor(7007, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 380/4921 [13:14<2:37:12,  2.08s/it]

tensor(7707, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 381/4921 [13:16<2:38:00,  2.09s/it]

tensor(8407, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 382/4921 [13:18<2:36:41,  2.07s/it]

tensor(8407, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 383/4921 [13:20<2:35:38,  2.06s/it]

tensor(13307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 384/4921 [13:22<2:38:11,  2.09s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 385/4921 [13:24<2:36:32,  2.07s/it]

tensor(10507, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 386/4921 [13:26<2:37:07,  2.08s/it]

tensor(11907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 387/4921 [13:28<2:37:39,  2.09s/it]

tensor(10507, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 388/4921 [13:30<2:36:54,  2.08s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 389/4921 [13:32<2:35:44,  2.06s/it]

tensor(18907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 390/4921 [13:34<2:35:33,  2.06s/it]

tensor(13307, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 391/4921 [13:36<2:35:54,  2.07s/it]

tensor(9107, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 392/4921 [13:39<2:39:35,  2.11s/it]

tensor(9807, device='cuda:0') tensor(133, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 393/4921 [13:41<2:39:46,  2.12s/it]

tensor(17507, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 394/4921 [13:43<2:37:58,  2.09s/it]

tensor(7707, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 395/4921 [13:45<2:36:43,  2.08s/it]

tensor(7707, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 396/4921 [13:47<2:36:04,  2.07s/it]

tensor(9807, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 397/4921 [13:49<2:35:36,  2.06s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 398/4921 [13:51<2:36:00,  2.07s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 399/4921 [13:53<2:36:36,  2.08s/it]

tensor(7707, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 400/4921 [13:55<2:40:12,  2.13s/it]

tensor(6307, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 401/4921 [13:57<2:37:35,  2.09s/it]

tensor(8407, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 402/4921 [13:59<2:35:49,  2.07s/it]

tensor(12607, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 403/4921 [14:01<2:35:29,  2.06s/it]

tensor(9107, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 404/4921 [14:04<2:34:47,  2.06s/it]

tensor(8407, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 405/4921 [14:06<2:35:17,  2.06s/it]

tensor(9807, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 406/4921 [14:08<2:35:40,  2.07s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 407/4921 [14:10<2:36:07,  2.08s/it]

tensor(9107, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 408/4921 [14:12<2:37:51,  2.10s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 409/4921 [14:14<2:35:41,  2.07s/it]

tensor(9807, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 410/4921 [14:16<2:35:22,  2.07s/it]

tensor(7707, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 411/4921 [14:18<2:35:32,  2.07s/it]

tensor(7707, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 412/4921 [14:20<2:35:02,  2.06s/it]

tensor(7707, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 413/4921 [14:22<2:36:06,  2.08s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 414/4921 [14:24<2:36:24,  2.08s/it]

tensor(7707, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 415/4921 [14:26<2:35:10,  2.07s/it]

tensor(6307, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 416/4921 [14:29<2:38:30,  2.11s/it]

tensor(9807, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 417/4921 [14:31<2:38:09,  2.11s/it]

tensor(8407, device='cuda:0') tensor(174, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  8%|▊         | 418/4921 [14:33<2:36:37,  2.09s/it]

tensor(7707, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 419/4921 [14:35<2:36:35,  2.09s/it]

tensor(11907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 420/4921 [14:37<2:37:52,  2.10s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 421/4921 [14:39<2:36:16,  2.08s/it]

tensor(11207, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 422/4921 [14:41<2:35:27,  2.07s/it]

tensor(9107, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 423/4921 [14:43<2:36:01,  2.08s/it]

tensor(9807, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 424/4921 [14:45<2:38:53,  2.12s/it]

tensor(9807, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 425/4921 [14:47<2:37:35,  2.10s/it]

tensor(7007, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 426/4921 [14:49<2:37:10,  2.10s/it]

tensor(7007, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 427/4921 [14:52<2:36:59,  2.10s/it]

tensor(7007, device='cuda:0') tensor(589, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 428/4921 [14:54<2:35:49,  2.08s/it]

tensor(8407, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 429/4921 [14:56<2:35:56,  2.08s/it]

tensor(16807, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▊         | 430/4921 [14:58<2:36:04,  2.09s/it]

tensor(22407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 431/4921 [15:00<2:34:46,  2.07s/it]

tensor(7007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 432/4921 [15:02<2:37:58,  2.11s/it]

tensor(11207, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 433/4921 [15:04<2:37:08,  2.10s/it]

tensor(8407, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 434/4921 [15:06<2:36:03,  2.09s/it]

tensor(7707, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 435/4921 [15:08<2:35:42,  2.08s/it]

tensor(9107, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 436/4921 [15:10<2:34:32,  2.07s/it]

tensor(7707, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 437/4921 [15:12<2:33:36,  2.06s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 438/4921 [15:14<2:32:49,  2.05s/it]

tensor(9107, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 439/4921 [15:16<2:33:50,  2.06s/it]

tensor(10507, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 440/4921 [15:19<2:38:59,  2.13s/it]

tensor(7007, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 441/4921 [15:21<2:38:50,  2.13s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 442/4921 [15:23<2:38:06,  2.12s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 443/4921 [15:25<2:36:56,  2.10s/it]

tensor(10507, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 444/4921 [15:27<2:36:13,  2.09s/it]

tensor(8407, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 445/4921 [15:29<2:35:25,  2.08s/it]

tensor(9807, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 446/4921 [15:31<2:36:46,  2.10s/it]

tensor(7007, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 447/4921 [15:33<2:37:51,  2.12s/it]

tensor(7007, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 448/4921 [15:36<2:40:34,  2.15s/it]

tensor(10507, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 449/4921 [15:38<2:38:37,  2.13s/it]

tensor(9107, device='cuda:0') tensor(120, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 450/4921 [15:40<2:36:45,  2.10s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 451/4921 [15:42<2:35:39,  2.09s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 452/4921 [15:44<2:34:55,  2.08s/it]

tensor(7007, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 453/4921 [15:46<2:36:49,  2.11s/it]

tensor(6307, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 454/4921 [15:48<2:37:27,  2.12s/it]

tensor(9107, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 455/4921 [15:50<2:36:23,  2.10s/it]

tensor(7707, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 456/4921 [15:52<2:38:34,  2.13s/it]

tensor(7007, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 457/4921 [15:54<2:36:06,  2.10s/it]

tensor(9107, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 458/4921 [15:57<2:34:17,  2.07s/it]

tensor(7007, device='cuda:0') tensor(589, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 459/4921 [15:59<2:34:31,  2.08s/it]

tensor(7007, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 460/4921 [16:01<2:35:27,  2.09s/it]

tensor(8407, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 461/4921 [16:03<2:34:25,  2.08s/it]

tensor(7007, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 462/4921 [16:05<2:34:21,  2.08s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 463/4921 [16:07<2:33:14,  2.06s/it]

tensor(8407, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 464/4921 [16:09<2:35:45,  2.10s/it]

tensor(7007, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 465/4921 [16:11<2:34:59,  2.09s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 466/4921 [16:13<2:35:30,  2.09s/it]

tensor(10507, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


  9%|▉         | 467/4921 [16:15<2:36:08,  2.10s/it]

tensor(7007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 468/4921 [16:17<2:35:25,  2.09s/it]

tensor(7007, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 469/4921 [16:19<2:34:39,  2.08s/it]

tensor(6307, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 470/4921 [16:21<2:33:11,  2.07s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 471/4921 [16:24<2:34:13,  2.08s/it]

tensor(7707, device='cuda:0') tensor(113, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 472/4921 [16:26<2:37:41,  2.13s/it]

tensor(9807, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 473/4921 [16:28<2:36:41,  2.11s/it]

tensor(7707, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 474/4921 [16:30<2:35:38,  2.10s/it]

tensor(9107, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 475/4921 [16:32<2:33:00,  2.06s/it]

tensor(13307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 476/4921 [16:34<2:32:23,  2.06s/it]

tensor(9807, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 477/4921 [16:36<2:31:44,  2.05s/it]

tensor(9107, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 478/4921 [16:38<2:31:51,  2.05s/it]

tensor(8407, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 479/4921 [16:40<2:31:53,  2.05s/it]

tensor(7707, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 480/4921 [16:42<2:36:15,  2.11s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 481/4921 [16:44<2:35:04,  2.10s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 482/4921 [16:46<2:33:44,  2.08s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 483/4921 [16:49<2:32:24,  2.06s/it]

tensor(7007, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 484/4921 [16:51<2:32:28,  2.06s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 485/4921 [16:53<2:32:01,  2.06s/it]

tensor(10507, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 486/4921 [16:55<2:32:13,  2.06s/it]

tensor(6307, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 487/4921 [16:57<2:32:25,  2.06s/it]

tensor(9107, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 488/4921 [16:59<2:35:01,  2.10s/it]

tensor(6307, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 489/4921 [17:01<2:33:15,  2.07s/it]

tensor(6307, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 490/4921 [17:03<2:32:15,  2.06s/it]

tensor(8407, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 491/4921 [17:05<2:31:54,  2.06s/it]

tensor(7707, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|▉         | 492/4921 [17:07<2:30:59,  2.05s/it]

tensor(7007, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 493/4921 [17:09<2:30:22,  2.04s/it]

tensor(7007, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 494/4921 [17:11<2:30:01,  2.03s/it]

tensor(7007, device='cuda:0') tensor(153, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 495/4921 [17:13<2:28:48,  2.02s/it]

tensor(7007, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 496/4921 [17:15<2:33:10,  2.08s/it]

tensor(6307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 497/4921 [17:17<2:33:23,  2.08s/it]

tensor(7007, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 498/4921 [17:19<2:31:53,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 499/4921 [17:21<2:31:41,  2.06s/it]

tensor(12607, device='cuda:0') tensor(28, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 500/4921 [17:24<2:32:04,  2.06s/it]

tensor(8407, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 501/4921 [17:26<2:32:34,  2.07s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 502/4921 [17:28<2:33:10,  2.08s/it]

tensor(7007, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 503/4921 [17:30<2:33:00,  2.08s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 504/4921 [17:32<2:34:33,  2.10s/it]

tensor(8407, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 505/4921 [17:34<2:32:49,  2.08s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 506/4921 [17:36<2:32:10,  2.07s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 507/4921 [17:38<2:31:50,  2.06s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 508/4921 [17:40<2:31:17,  2.06s/it]

tensor(7007, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 509/4921 [17:42<2:31:21,  2.06s/it]

tensor(9107, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 510/4921 [17:44<2:30:17,  2.04s/it]

tensor(8407, device='cuda:0') tensor(200, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 511/4921 [17:46<2:30:44,  2.05s/it]

tensor(7007, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 512/4921 [17:48<2:33:53,  2.09s/it]

tensor(9807, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 513/4921 [17:51<2:33:20,  2.09s/it]

tensor(7707, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 514/4921 [17:53<2:32:56,  2.08s/it]

tensor(9807, device='cuda:0') tensor(174, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 515/4921 [17:55<2:32:11,  2.07s/it]

tensor(13307, device='cuda:0') tensor(58, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 10%|█         | 516/4921 [17:57<2:31:00,  2.06s/it]

tensor(7007, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 517/4921 [17:59<2:30:30,  2.05s/it]

tensor(16807, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 518/4921 [18:01<2:28:43,  2.03s/it]

tensor(10507, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 519/4921 [18:03<2:28:08,  2.02s/it]

tensor(7007, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 520/4921 [18:05<2:32:58,  2.09s/it]

tensor(11207, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 521/4921 [18:07<2:33:02,  2.09s/it]

tensor(9807, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 522/4921 [18:09<2:31:53,  2.07s/it]

tensor(8407, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 523/4921 [18:11<2:30:35,  2.05s/it]

tensor(7707, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 524/4921 [18:13<2:30:16,  2.05s/it]

tensor(7007, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 525/4921 [18:15<2:31:40,  2.07s/it]

tensor(6307, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 526/4921 [18:17<2:31:49,  2.07s/it]

tensor(6307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 527/4921 [18:19<2:32:43,  2.09s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 528/4921 [18:22<2:36:18,  2.13s/it]

tensor(6307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 529/4921 [18:24<2:33:15,  2.09s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 530/4921 [18:26<2:31:16,  2.07s/it]

tensor(9107, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 531/4921 [18:28<2:29:50,  2.05s/it]

tensor(7007, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 532/4921 [18:30<2:28:31,  2.03s/it]

tensor(10507, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 533/4921 [18:32<2:29:21,  2.04s/it]

tensor(7707, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 534/4921 [18:34<2:30:22,  2.06s/it]

tensor(6307, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 535/4921 [18:36<2:30:43,  2.06s/it]

tensor(7007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 536/4921 [18:38<2:32:58,  2.09s/it]

tensor(9107, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 537/4921 [18:40<2:32:46,  2.09s/it]

tensor(7707, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 538/4921 [18:42<2:31:07,  2.07s/it]

tensor(7007, device='cuda:0') tensor(125, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 539/4921 [18:44<2:30:45,  2.06s/it]

tensor(8407, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 540/4921 [18:46<2:31:02,  2.07s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 541/4921 [18:48<2:31:59,  2.08s/it]

tensor(7007, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 542/4921 [18:50<2:31:17,  2.07s/it]

tensor(7707, device='cuda:0') tensor(300, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 543/4921 [18:52<2:30:32,  2.06s/it]

tensor(11207, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 544/4921 [18:55<2:32:56,  2.10s/it]

tensor(5607, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 545/4921 [18:57<2:31:13,  2.07s/it]

tensor(9807, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 546/4921 [18:59<2:31:05,  2.07s/it]

tensor(6307, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 547/4921 [19:01<2:31:09,  2.07s/it]

tensor(8407, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 548/4921 [19:03<2:30:36,  2.07s/it]

tensor(5607, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 549/4921 [19:05<2:29:52,  2.06s/it]

tensor(7007, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 550/4921 [19:07<2:28:02,  2.03s/it]

tensor(9807, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 551/4921 [19:09<2:27:14,  2.02s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 552/4921 [19:11<2:31:05,  2.07s/it]

tensor(7007, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█         | 553/4921 [19:13<2:30:27,  2.07s/it]

tensor(9107, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 554/4921 [19:15<2:30:50,  2.07s/it]

tensor(7007, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 555/4921 [19:17<2:30:12,  2.06s/it]

tensor(11907, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 556/4921 [19:19<2:28:48,  2.05s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 557/4921 [19:21<2:28:20,  2.04s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 558/4921 [19:23<2:28:38,  2.04s/it]

tensor(7007, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 559/4921 [19:25<2:29:28,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 560/4921 [19:28<2:32:32,  2.10s/it]

tensor(16107, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 561/4921 [19:30<2:32:04,  2.09s/it]

tensor(12607, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 562/4921 [19:32<2:30:20,  2.07s/it]

tensor(9107, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 563/4921 [19:34<2:29:05,  2.05s/it]

tensor(10507, device='cuda:0') tensor(162, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 564/4921 [19:36<2:29:13,  2.06s/it]

tensor(7707, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 11%|█▏        | 565/4921 [19:38<2:28:11,  2.04s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 566/4921 [19:40<2:26:57,  2.02s/it]

tensor(11207, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 567/4921 [19:42<2:27:56,  2.04s/it]

tensor(8407, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 568/4921 [19:44<2:31:18,  2.09s/it]

tensor(7007, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 569/4921 [19:46<2:29:44,  2.06s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 570/4921 [19:48<2:29:41,  2.06s/it]

tensor(10507, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 571/4921 [19:50<2:29:36,  2.06s/it]

tensor(6307, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 572/4921 [19:52<2:29:20,  2.06s/it]

tensor(9107, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 573/4921 [19:54<2:28:44,  2.05s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 574/4921 [19:56<2:30:08,  2.07s/it]

tensor(11907, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 575/4921 [19:58<2:29:33,  2.06s/it]

tensor(6307, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 576/4921 [20:01<2:32:52,  2.11s/it]

tensor(14007, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 577/4921 [20:03<2:31:08,  2.09s/it]

tensor(7707, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 578/4921 [20:05<2:29:54,  2.07s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 579/4921 [20:07<2:28:28,  2.05s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 580/4921 [20:09<2:30:13,  2.08s/it]

tensor(7007, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 581/4921 [20:11<2:29:48,  2.07s/it]

tensor(8407, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 582/4921 [20:13<2:29:22,  2.07s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 583/4921 [20:15<2:28:48,  2.06s/it]

tensor(11207, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 584/4921 [20:17<2:32:00,  2.10s/it]

tensor(7707, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 585/4921 [20:19<2:30:04,  2.08s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 586/4921 [20:21<2:29:38,  2.07s/it]

tensor(10507, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 587/4921 [20:23<2:29:30,  2.07s/it]

tensor(18207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 588/4921 [20:25<2:30:23,  2.08s/it]

tensor(6307, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 589/4921 [20:28<2:30:15,  2.08s/it]

tensor(7707, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 590/4921 [20:30<2:29:32,  2.07s/it]

tensor(8407, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 591/4921 [20:32<2:28:56,  2.06s/it]

tensor(6307, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 592/4921 [20:34<2:32:36,  2.12s/it]

tensor(7007, device='cuda:0') tensor(134, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 593/4921 [20:36<2:31:16,  2.10s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 594/4921 [20:38<2:31:32,  2.10s/it]

tensor(6307, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 595/4921 [20:40<2:31:53,  2.11s/it]

tensor(5607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 596/4921 [20:42<2:30:57,  2.09s/it]

tensor(8407, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 597/4921 [20:44<2:29:51,  2.08s/it]

tensor(6307, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 598/4921 [20:46<2:29:14,  2.07s/it]

tensor(6307, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 599/4921 [20:48<2:28:47,  2.07s/it]

tensor(7707, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 600/4921 [20:51<2:32:56,  2.12s/it]

tensor(7007, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 601/4921 [20:53<2:32:54,  2.12s/it]

tensor(6307, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 602/4921 [20:55<2:31:15,  2.10s/it]

tensor(5607, device='cuda:0') tensor(133, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 603/4921 [20:57<2:29:46,  2.08s/it]

tensor(7707, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 604/4921 [20:59<2:28:00,  2.06s/it]

tensor(7007, device='cuda:0') tensor(34, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 605/4921 [21:01<2:28:13,  2.06s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 606/4921 [21:03<2:28:37,  2.07s/it]

tensor(5607, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 607/4921 [21:05<2:29:32,  2.08s/it]

tensor(8407, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 608/4921 [21:07<2:33:58,  2.14s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 609/4921 [21:09<2:30:28,  2.09s/it]

tensor(5607, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 610/4921 [21:11<2:27:52,  2.06s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 611/4921 [21:13<2:25:59,  2.03s/it]

tensor(7707, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 612/4921 [21:15<2:25:04,  2.02s/it]

tensor(11907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 613/4921 [21:17<2:25:08,  2.02s/it]

tensor(5607, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 614/4921 [21:19<2:25:43,  2.03s/it]

tensor(6307, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 12%|█▏        | 615/4921 [21:21<2:25:46,  2.03s/it]

tensor(6307, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 616/4921 [21:24<2:28:44,  2.07s/it]

tensor(6307, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 617/4921 [21:26<2:27:02,  2.05s/it]

tensor(5607, device='cuda:0') tensor(126, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 618/4921 [21:28<2:25:55,  2.03s/it]

tensor(9107, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 619/4921 [21:30<2:25:26,  2.03s/it]

tensor(7007, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 620/4921 [21:32<2:26:26,  2.04s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 621/4921 [21:34<2:26:50,  2.05s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 622/4921 [21:36<2:26:27,  2.04s/it]

tensor(5607, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 623/4921 [21:38<2:25:15,  2.03s/it]

tensor(7007, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 624/4921 [21:40<2:27:01,  2.05s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 625/4921 [21:42<2:26:40,  2.05s/it]

tensor(7007, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 626/4921 [21:44<2:25:30,  2.03s/it]

tensor(6307, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 627/4921 [21:46<2:25:35,  2.03s/it]

tensor(9107, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 628/4921 [21:48<2:26:07,  2.04s/it]

tensor(7707, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 629/4921 [21:50<2:25:52,  2.04s/it]

tensor(7707, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 630/4921 [21:52<2:25:13,  2.03s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 631/4921 [21:54<2:24:54,  2.03s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 632/4921 [21:56<2:28:15,  2.07s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 633/4921 [21:58<2:26:20,  2.05s/it]

tensor(5607, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 634/4921 [22:00<2:25:39,  2.04s/it]

tensor(7707, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 635/4921 [22:02<2:25:15,  2.03s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 636/4921 [22:04<2:24:35,  2.02s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 637/4921 [22:06<2:24:00,  2.02s/it]

tensor(7707, device='cuda:0') tensor(165, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 638/4921 [22:08<2:23:38,  2.01s/it]

tensor(5607, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 639/4921 [22:10<2:24:04,  2.02s/it]

tensor(7707, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 640/4921 [22:12<2:26:16,  2.05s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 641/4921 [22:14<2:26:11,  2.05s/it]

tensor(7007, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 642/4921 [22:17<2:25:44,  2.04s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 643/4921 [22:18<2:24:08,  2.02s/it]

tensor(6307, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 644/4921 [22:20<2:23:36,  2.01s/it]

tensor(7707, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 645/4921 [22:23<2:23:20,  2.01s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 646/4921 [22:24<2:22:17,  2.00s/it]

tensor(9107, device='cuda:0') tensor(153, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 647/4921 [22:26<2:22:43,  2.00s/it]

tensor(9807, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 648/4921 [22:29<2:27:04,  2.07s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 649/4921 [22:31<2:26:17,  2.05s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 650/4921 [22:33<2:25:52,  2.05s/it]

tensor(7707, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 651/4921 [22:35<2:25:38,  2.05s/it]

tensor(7007, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 652/4921 [22:37<2:25:20,  2.04s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 653/4921 [22:39<2:23:19,  2.01s/it]

tensor(15407, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 654/4921 [22:41<2:22:52,  2.01s/it]

tensor(7007, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 655/4921 [22:43<2:23:41,  2.02s/it]

tensor(7707, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 656/4921 [22:45<2:26:30,  2.06s/it]

tensor(7707, device='cuda:0') tensor(201, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 657/4921 [22:47<2:26:25,  2.06s/it]

tensor(8407, device='cuda:0') tensor(152, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 658/4921 [22:49<2:24:20,  2.03s/it]

tensor(7007, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 659/4921 [22:51<2:22:46,  2.01s/it]

tensor(8407, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 660/4921 [22:53<2:21:25,  1.99s/it]

tensor(6307, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 661/4921 [22:55<2:21:30,  1.99s/it]

tensor(5607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 662/4921 [22:57<2:22:14,  2.00s/it]

tensor(7707, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 663/4921 [22:59<2:22:08,  2.00s/it]

tensor(6307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 13%|█▎        | 664/4921 [23:01<2:24:38,  2.04s/it]

tensor(9107, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 665/4921 [23:03<2:23:26,  2.02s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 666/4921 [23:05<2:21:38,  2.00s/it]

tensor(11207, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 667/4921 [23:07<2:21:23,  1.99s/it]

tensor(6307, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 668/4921 [23:09<2:21:50,  2.00s/it]

tensor(5607, device='cuda:0') tensor(125, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 669/4921 [23:11<2:22:34,  2.01s/it]

tensor(12607, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 670/4921 [23:13<2:22:34,  2.01s/it]

tensor(7007, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 671/4921 [23:15<2:21:48,  2.00s/it]

tensor(5607, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 672/4921 [23:17<2:24:11,  2.04s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 673/4921 [23:19<2:22:27,  2.01s/it]

tensor(9107, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 674/4921 [23:21<2:21:54,  2.00s/it]

tensor(7707, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 675/4921 [23:23<2:23:01,  2.02s/it]

tensor(10507, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▎        | 676/4921 [23:25<2:23:31,  2.03s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 677/4921 [23:27<2:23:12,  2.02s/it]

tensor(6307, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 678/4921 [23:29<2:22:25,  2.01s/it]

tensor(7707, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 679/4921 [23:31<2:21:49,  2.01s/it]

tensor(6307, device='cuda:0') tensor(263, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 680/4921 [23:33<2:24:37,  2.05s/it]

tensor(5607, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 681/4921 [23:35<2:23:45,  2.03s/it]

tensor(10507, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 682/4921 [23:37<2:23:35,  2.03s/it]

tensor(6307, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 683/4921 [23:39<2:24:14,  2.04s/it]

tensor(14007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 684/4921 [23:41<2:23:13,  2.03s/it]

tensor(9107, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 685/4921 [23:43<2:22:11,  2.01s/it]

tensor(5607, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 686/4921 [23:45<2:21:31,  2.01s/it]

tensor(7707, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 687/4921 [23:47<2:22:06,  2.01s/it]

tensor(7707, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 688/4921 [23:50<2:25:41,  2.07s/it]

tensor(8407, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 689/4921 [23:52<2:25:19,  2.06s/it]

tensor(8407, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 690/4921 [23:54<2:24:25,  2.05s/it]

tensor(8407, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 691/4921 [23:56<2:22:45,  2.02s/it]

tensor(11207, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 692/4921 [23:58<2:21:52,  2.01s/it]

tensor(9807, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 693/4921 [24:00<2:21:47,  2.01s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 694/4921 [24:02<2:21:49,  2.01s/it]

tensor(7007, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 695/4921 [24:04<2:23:54,  2.04s/it]

tensor(10507, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 696/4921 [24:06<2:27:21,  2.09s/it]

tensor(6307, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 697/4921 [24:08<2:25:07,  2.06s/it]

tensor(6307, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 698/4921 [24:10<2:23:18,  2.04s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 699/4921 [24:12<2:22:32,  2.03s/it]

tensor(14707, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 700/4921 [24:14<2:22:48,  2.03s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 701/4921 [24:16<2:22:22,  2.02s/it]

tensor(10507, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 702/4921 [24:18<2:22:05,  2.02s/it]

tensor(7007, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 703/4921 [24:20<2:22:35,  2.03s/it]

tensor(8407, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 704/4921 [24:22<2:24:49,  2.06s/it]

tensor(11207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 705/4921 [24:24<2:23:44,  2.05s/it]

tensor(7007, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 706/4921 [24:26<2:22:31,  2.03s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 707/4921 [24:28<2:22:34,  2.03s/it]

tensor(7007, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 708/4921 [24:30<2:22:10,  2.02s/it]

tensor(7007, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 709/4921 [24:32<2:21:49,  2.02s/it]

tensor(8407, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 710/4921 [24:34<2:22:02,  2.02s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 711/4921 [24:36<2:21:25,  2.02s/it]

tensor(7007, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 712/4921 [24:38<2:24:17,  2.06s/it]

tensor(5607, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 14%|█▍        | 713/4921 [24:40<2:22:45,  2.04s/it]

tensor(7707, device='cuda:0') tensor(165, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 714/4921 [24:42<2:22:17,  2.03s/it]

tensor(7707, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 715/4921 [24:44<2:23:10,  2.04s/it]

tensor(6307, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 716/4921 [24:47<2:23:30,  2.05s/it]

tensor(8407, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 717/4921 [24:49<2:23:33,  2.05s/it]

tensor(11907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 718/4921 [24:51<2:23:02,  2.04s/it]

tensor(9807, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 719/4921 [24:53<2:22:56,  2.04s/it]

tensor(9107, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 720/4921 [24:55<2:25:41,  2.08s/it]

tensor(9807, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 721/4921 [24:57<2:23:51,  2.06s/it]

tensor(6307, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 722/4921 [24:59<2:23:02,  2.04s/it]

tensor(8407, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 723/4921 [25:01<2:24:19,  2.06s/it]

tensor(9807, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 724/4921 [25:03<2:23:31,  2.05s/it]

tensor(5607, device='cuda:0') tensor(174, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 725/4921 [25:05<2:24:29,  2.07s/it]

tensor(6307, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 726/4921 [25:07<2:24:58,  2.07s/it]

tensor(14007, device='cuda:0') tensor(169, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 727/4921 [25:09<2:23:48,  2.06s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 728/4921 [25:11<2:25:26,  2.08s/it]

tensor(9107, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 729/4921 [25:13<2:24:14,  2.06s/it]

tensor(7007, device='cuda:0') tensor(293, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 730/4921 [25:15<2:25:12,  2.08s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 731/4921 [25:18<2:24:38,  2.07s/it]

tensor(9807, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 732/4921 [25:20<2:23:29,  2.06s/it]

tensor(7007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 733/4921 [25:22<2:22:08,  2.04s/it]

tensor(7007, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 734/4921 [25:24<2:21:22,  2.03s/it]

tensor(9107, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 735/4921 [25:26<2:20:53,  2.02s/it]

tensor(15407, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 736/4921 [25:28<2:24:56,  2.08s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 737/4921 [25:30<2:24:54,  2.08s/it]

tensor(6307, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▍        | 738/4921 [25:32<2:23:45,  2.06s/it]

tensor(5607, device='cuda:0') tensor(28, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 739/4921 [25:34<2:22:51,  2.05s/it]

tensor(4907, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 740/4921 [25:36<2:22:06,  2.04s/it]

tensor(11207, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 741/4921 [25:38<2:22:20,  2.04s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 742/4921 [25:40<2:21:25,  2.03s/it]

tensor(16807, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 743/4921 [25:42<2:22:29,  2.05s/it]

tensor(11207, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 744/4921 [25:44<2:26:18,  2.10s/it]

tensor(7007, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 745/4921 [25:46<2:25:02,  2.08s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 746/4921 [25:48<2:23:32,  2.06s/it]

tensor(7707, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 747/4921 [25:50<2:22:08,  2.04s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 748/4921 [25:52<2:21:15,  2.03s/it]

tensor(6307, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 749/4921 [25:54<2:21:54,  2.04s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 750/4921 [25:56<2:23:22,  2.06s/it]

tensor(7007, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 751/4921 [25:59<2:23:20,  2.06s/it]

tensor(12607, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 752/4921 [26:01<2:25:50,  2.10s/it]

tensor(5607, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 753/4921 [26:03<2:23:42,  2.07s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 754/4921 [26:05<2:22:58,  2.06s/it]

tensor(6307, device='cuda:0') tensor(264, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 755/4921 [26:07<2:22:40,  2.05s/it]

tensor(4907, device='cuda:0') tensor(290, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 756/4921 [26:09<2:22:21,  2.05s/it]

tensor(6307, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 757/4921 [26:11<2:23:56,  2.07s/it]

tensor(6307, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 758/4921 [26:13<2:22:41,  2.06s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 759/4921 [26:15<2:21:15,  2.04s/it]

tensor(7707, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 760/4921 [26:17<2:23:33,  2.07s/it]

tensor(7007, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 761/4921 [26:19<2:23:19,  2.07s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 15%|█▌        | 762/4921 [26:21<2:22:51,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 763/4921 [26:23<2:23:19,  2.07s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 764/4921 [26:25<2:23:00,  2.06s/it]

tensor(14707, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 765/4921 [26:27<2:22:52,  2.06s/it]

tensor(7707, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 766/4921 [26:29<2:21:48,  2.05s/it]

tensor(9807, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 767/4921 [26:32<2:21:52,  2.05s/it]

tensor(7707, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 768/4921 [26:34<2:24:58,  2.09s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 769/4921 [26:36<2:24:03,  2.08s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 770/4921 [26:38<2:22:59,  2.07s/it]

tensor(6307, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 771/4921 [26:40<2:21:49,  2.05s/it]

tensor(7707, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 772/4921 [26:42<2:20:10,  2.03s/it]

tensor(6307, device='cuda:0') tensor(34, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 773/4921 [26:44<2:19:23,  2.02s/it]

tensor(11207, device='cuda:0') tensor(628, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 774/4921 [26:46<2:20:44,  2.04s/it]

tensor(5607, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 775/4921 [26:48<2:21:56,  2.05s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 776/4921 [26:50<2:26:37,  2.12s/it]

tensor(6307, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 777/4921 [26:52<2:26:17,  2.12s/it]

tensor(10507, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 778/4921 [26:54<2:24:42,  2.10s/it]

tensor(9807, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 779/4921 [26:56<2:23:07,  2.07s/it]

tensor(6307, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 780/4921 [26:58<2:22:07,  2.06s/it]

tensor(5607, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 781/4921 [27:00<2:21:34,  2.05s/it]

tensor(5607, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 782/4921 [27:02<2:20:50,  2.04s/it]

tensor(6307, device='cuda:0') tensor(15, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 783/4921 [27:05<2:21:23,  2.05s/it]

tensor(7007, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 784/4921 [27:07<2:25:22,  2.11s/it]

tensor(14007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 785/4921 [27:09<2:23:23,  2.08s/it]

tensor(6307, device='cuda:0') tensor(237, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 786/4921 [27:11<2:22:17,  2.06s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 787/4921 [27:13<2:21:59,  2.06s/it]

tensor(7707, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 788/4921 [27:15<2:21:42,  2.06s/it]

tensor(5607, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 789/4921 [27:17<2:22:01,  2.06s/it]

tensor(8407, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 790/4921 [27:19<2:21:49,  2.06s/it]

tensor(6307, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 791/4921 [27:21<2:22:13,  2.07s/it]

tensor(10507, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 792/4921 [27:23<2:24:40,  2.10s/it]

tensor(11207, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 793/4921 [27:25<2:22:40,  2.07s/it]

tensor(9107, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 794/4921 [27:27<2:21:54,  2.06s/it]

tensor(11207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 795/4921 [27:29<2:20:33,  2.04s/it]

tensor(6307, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 796/4921 [27:31<2:20:10,  2.04s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 797/4921 [27:33<2:20:39,  2.05s/it]

tensor(11207, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 798/4921 [27:36<2:20:40,  2.05s/it]

tensor(7707, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▌        | 799/4921 [27:38<2:19:59,  2.04s/it]

tensor(6307, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 800/4921 [27:40<2:22:24,  2.07s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 801/4921 [27:42<2:21:07,  2.06s/it]

tensor(11207, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 802/4921 [27:44<2:20:09,  2.04s/it]

tensor(5607, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 803/4921 [27:46<2:20:00,  2.04s/it]

tensor(7707, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 804/4921 [27:48<2:21:24,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 805/4921 [27:50<2:21:03,  2.06s/it]

tensor(5607, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 806/4921 [27:52<2:20:46,  2.05s/it]

tensor(13307, device='cuda:0') tensor(263, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 807/4921 [27:54<2:19:28,  2.03s/it]

tensor(6307, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 808/4921 [27:56<2:21:54,  2.07s/it]

tensor(7007, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 809/4921 [27:58<2:20:20,  2.05s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 810/4921 [28:00<2:19:56,  2.04s/it]

tensor(7007, device='cuda:0') tensor(74, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 16%|█▋        | 811/4921 [28:02<2:20:08,  2.05s/it]

tensor(5607, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 812/4921 [28:04<2:19:44,  2.04s/it]

tensor(6307, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 813/4921 [28:06<2:18:59,  2.03s/it]

tensor(6307, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 814/4921 [28:08<2:18:19,  2.02s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 815/4921 [28:10<2:17:33,  2.01s/it]

tensor(6307, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 816/4921 [28:12<2:20:42,  2.06s/it]

tensor(6307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 817/4921 [28:14<2:21:07,  2.06s/it]

tensor(5607, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 818/4921 [28:17<2:21:27,  2.07s/it]

tensor(6307, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 819/4921 [28:19<2:20:01,  2.05s/it]

tensor(6307, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 820/4921 [28:21<2:19:08,  2.04s/it]

tensor(7707, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 821/4921 [28:22<2:17:44,  2.02s/it]

tensor(6307, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 822/4921 [28:24<2:16:50,  2.00s/it]

tensor(7007, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 823/4921 [28:27<2:17:53,  2.02s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 824/4921 [28:29<2:22:31,  2.09s/it]

tensor(9107, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 825/4921 [28:31<2:21:33,  2.07s/it]

tensor(5607, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 826/4921 [28:33<2:19:18,  2.04s/it]

tensor(8407, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 827/4921 [28:35<2:18:18,  2.03s/it]

tensor(6307, device='cuda:0') tensor(132, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 828/4921 [28:37<2:17:36,  2.02s/it]

tensor(9107, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 829/4921 [28:39<2:18:05,  2.02s/it]

tensor(7007, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 830/4921 [28:41<2:18:04,  2.02s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 831/4921 [28:43<2:19:25,  2.05s/it]

tensor(8407, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 832/4921 [28:45<2:21:50,  2.08s/it]

tensor(9107, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 833/4921 [28:47<2:20:04,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 834/4921 [28:49<2:18:19,  2.03s/it]

tensor(10507, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 835/4921 [28:51<2:18:24,  2.03s/it]

tensor(7007, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 836/4921 [28:53<2:17:51,  2.02s/it]

tensor(7007, device='cuda:0') tensor(84, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 837/4921 [28:55<2:18:18,  2.03s/it]

tensor(7007, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 838/4921 [28:57<2:18:27,  2.03s/it]

tensor(5607, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 839/4921 [28:59<2:18:24,  2.03s/it]

tensor(7007, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 840/4921 [29:01<2:20:48,  2.07s/it]

tensor(5607, device='cuda:0') tensor(169, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 841/4921 [29:03<2:19:48,  2.06s/it]

tensor(6307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 842/4921 [29:05<2:20:13,  2.06s/it]

tensor(7007, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 843/4921 [29:08<2:20:13,  2.06s/it]

tensor(10507, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 844/4921 [29:10<2:21:18,  2.08s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 845/4921 [29:12<2:20:12,  2.06s/it]

tensor(5607, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 846/4921 [29:14<2:18:50,  2.04s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 847/4921 [29:16<2:17:32,  2.03s/it]

tensor(9107, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 848/4921 [29:18<2:20:14,  2.07s/it]

tensor(6307, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 849/4921 [29:20<2:19:17,  2.05s/it]

tensor(5607, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 850/4921 [29:22<2:19:08,  2.05s/it]

tensor(9107, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 851/4921 [29:24<2:18:31,  2.04s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 852/4921 [29:26<2:18:10,  2.04s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 853/4921 [29:28<2:18:58,  2.05s/it]

tensor(7007, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 854/4921 [29:30<2:19:56,  2.06s/it]

tensor(7007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 855/4921 [29:32<2:20:04,  2.07s/it]

tensor(6307, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 856/4921 [29:34<2:22:06,  2.10s/it]

tensor(9107, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 857/4921 [29:36<2:21:14,  2.09s/it]

tensor(9107, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 858/4921 [29:38<2:19:48,  2.06s/it]

tensor(7007, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 859/4921 [29:40<2:18:14,  2.04s/it]

tensor(12607, device='cuda:0') tensor(162, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 860/4921 [29:42<2:17:30,  2.03s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 17%|█▋        | 861/4921 [29:44<2:17:10,  2.03s/it]

tensor(8407, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 862/4921 [29:47<2:17:35,  2.03s/it]

tensor(7007, device='cuda:0') tensor(120, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 863/4921 [29:49<2:17:37,  2.03s/it]

tensor(6307, device='cuda:0') tensor(18, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 864/4921 [29:51<2:20:24,  2.08s/it]

tensor(6307, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 865/4921 [29:53<2:19:48,  2.07s/it]

tensor(5607, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 866/4921 [29:55<2:19:17,  2.06s/it]

tensor(7707, device='cuda:0') tensor(263, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 867/4921 [29:57<2:19:30,  2.06s/it]

tensor(7007, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 868/4921 [29:59<2:18:20,  2.05s/it]

tensor(6307, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 869/4921 [30:01<2:16:52,  2.03s/it]

tensor(10507, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 870/4921 [30:03<2:16:47,  2.03s/it]

tensor(6307, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 871/4921 [30:05<2:16:53,  2.03s/it]

tensor(7707, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 872/4921 [30:07<2:20:20,  2.08s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 873/4921 [30:09<2:18:54,  2.06s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 874/4921 [30:11<2:18:08,  2.05s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 875/4921 [30:13<2:16:48,  2.03s/it]

tensor(5607, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 876/4921 [30:15<2:15:38,  2.01s/it]

tensor(8407, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 877/4921 [30:17<2:16:51,  2.03s/it]

tensor(7007, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 878/4921 [30:19<2:17:59,  2.05s/it]

tensor(6307, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 879/4921 [30:21<2:19:10,  2.07s/it]

tensor(6307, device='cuda:0') tensor(200, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 880/4921 [30:24<2:22:28,  2.12s/it]

tensor(4907, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 881/4921 [30:26<2:20:51,  2.09s/it]

tensor(11907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 882/4921 [30:28<2:19:30,  2.07s/it]

tensor(7007, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 883/4921 [30:30<2:18:29,  2.06s/it]

tensor(6307, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 884/4921 [30:32<2:18:19,  2.06s/it]

tensor(6307, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 885/4921 [30:34<2:19:54,  2.08s/it]

tensor(5607, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 886/4921 [30:36<2:19:43,  2.08s/it]

tensor(7007, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 887/4921 [30:38<2:18:04,  2.05s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 888/4921 [30:40<2:20:00,  2.08s/it]

tensor(6307, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 889/4921 [30:42<2:18:20,  2.06s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 890/4921 [30:44<2:16:56,  2.04s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 891/4921 [30:46<2:17:19,  2.04s/it]

tensor(7007, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 892/4921 [30:48<2:17:55,  2.05s/it]

tensor(6307, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 893/4921 [30:50<2:17:08,  2.04s/it]

tensor(8407, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 894/4921 [30:52<2:16:59,  2.04s/it]

tensor(9107, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 895/4921 [30:54<2:16:28,  2.03s/it]

tensor(5607, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 896/4921 [30:57<2:19:36,  2.08s/it]

tensor(4907, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 897/4921 [30:59<2:19:08,  2.07s/it]

tensor(7707, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 898/4921 [31:01<2:20:09,  2.09s/it]

tensor(7007, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 899/4921 [31:03<2:19:55,  2.09s/it]

tensor(9107, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 900/4921 [31:05<2:19:04,  2.08s/it]

tensor(5607, device='cuda:0') tensor(133, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 901/4921 [31:07<2:18:45,  2.07s/it]

tensor(7707, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 902/4921 [31:09<2:18:36,  2.07s/it]

tensor(9807, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 903/4921 [31:11<2:19:39,  2.09s/it]

tensor(14707, device='cuda:0') tensor(67, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 904/4921 [31:13<2:22:39,  2.13s/it]

tensor(6307, device='cuda:0') tensor(126, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 905/4921 [31:15<2:22:12,  2.12s/it]

tensor(11907, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 906/4921 [31:17<2:20:25,  2.10s/it]

tensor(7007, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 907/4921 [31:19<2:19:04,  2.08s/it]

tensor(7007, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 908/4921 [31:22<2:18:18,  2.07s/it]

tensor(6307, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 909/4921 [31:24<2:17:24,  2.05s/it]

tensor(11207, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 18%|█▊        | 910/4921 [31:26<2:17:46,  2.06s/it]

tensor(6307, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 911/4921 [31:28<2:18:50,  2.08s/it]

tensor(5607, device='cuda:0') tensor(18, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 912/4921 [31:30<2:21:59,  2.13s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 913/4921 [31:32<2:19:22,  2.09s/it]

tensor(5607, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 914/4921 [31:34<2:17:55,  2.07s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 915/4921 [31:36<2:16:54,  2.05s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 916/4921 [31:38<2:17:46,  2.06s/it]

tensor(7007, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 917/4921 [31:40<2:17:24,  2.06s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 918/4921 [31:42<2:17:19,  2.06s/it]

tensor(5607, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 919/4921 [31:44<2:16:46,  2.05s/it]

tensor(7707, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 920/4921 [31:46<2:19:18,  2.09s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 921/4921 [31:48<2:17:50,  2.07s/it]

tensor(9107, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▊        | 922/4921 [31:50<2:17:39,  2.07s/it]

tensor(6307, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 923/4921 [31:53<2:17:39,  2.07s/it]

tensor(9807, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 924/4921 [31:55<2:17:20,  2.06s/it]

tensor(6307, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 925/4921 [31:57<2:18:31,  2.08s/it]

tensor(10507, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 926/4921 [31:59<2:17:23,  2.06s/it]

tensor(6307, device='cuda:0') tensor(180, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 927/4921 [32:01<2:16:43,  2.05s/it]

tensor(5607, device='cuda:0') tensor(65, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 928/4921 [32:03<2:20:23,  2.11s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 929/4921 [32:05<2:19:56,  2.10s/it]

tensor(10507, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 930/4921 [32:07<2:18:41,  2.09s/it]

tensor(6307, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 931/4921 [32:09<2:19:08,  2.09s/it]

tensor(8407, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 932/4921 [32:11<2:19:08,  2.09s/it]

tensor(6307, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 933/4921 [32:13<2:18:11,  2.08s/it]

tensor(7707, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 934/4921 [32:15<2:17:42,  2.07s/it]

tensor(7707, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 935/4921 [32:18<2:17:15,  2.07s/it]

tensor(4907, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 936/4921 [32:20<2:19:56,  2.11s/it]

tensor(5607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 937/4921 [32:22<2:19:31,  2.10s/it]

tensor(8407, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 938/4921 [32:24<2:19:34,  2.10s/it]

tensor(6307, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 939/4921 [32:26<2:20:07,  2.11s/it]

tensor(4907, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 940/4921 [32:28<2:19:42,  2.11s/it]

tensor(5607, device='cuda:0') tensor(223, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 941/4921 [32:30<2:19:21,  2.10s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 942/4921 [32:32<2:18:17,  2.09s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 943/4921 [32:34<2:17:48,  2.08s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 944/4921 [32:37<2:21:18,  2.13s/it]

tensor(7007, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 945/4921 [32:39<2:20:16,  2.12s/it]

tensor(10507, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 946/4921 [32:41<2:19:59,  2.11s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 947/4921 [32:43<2:19:41,  2.11s/it]

tensor(6307, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 948/4921 [32:45<2:19:02,  2.10s/it]

tensor(5607, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 949/4921 [32:47<2:18:17,  2.09s/it]

tensor(5607, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 950/4921 [32:49<2:17:23,  2.08s/it]

tensor(8407, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 951/4921 [32:51<2:17:44,  2.08s/it]

tensor(6307, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 952/4921 [32:53<2:21:32,  2.14s/it]

tensor(7007, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 953/4921 [32:56<2:20:25,  2.12s/it]

tensor(5607, device='cuda:0') tensor(67, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 954/4921 [32:58<2:19:23,  2.11s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 955/4921 [33:00<2:17:55,  2.09s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 956/4921 [33:02<2:17:19,  2.08s/it]

tensor(4907, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 957/4921 [33:04<2:17:42,  2.08s/it]

tensor(4907, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 958/4921 [33:06<2:18:44,  2.10s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 19%|█▉        | 959/4921 [33:08<2:19:26,  2.11s/it]

tensor(5607, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 960/4921 [33:10<2:22:19,  2.16s/it]

tensor(9807, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 961/4921 [33:12<2:21:21,  2.14s/it]

tensor(10507, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 962/4921 [33:15<2:20:31,  2.13s/it]

tensor(5607, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 963/4921 [33:17<2:19:30,  2.11s/it]

tensor(7707, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 964/4921 [33:19<2:18:57,  2.11s/it]

tensor(4907, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 965/4921 [33:21<2:20:04,  2.12s/it]

tensor(6307, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 966/4921 [33:23<2:19:27,  2.12s/it]

tensor(5607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 967/4921 [33:25<2:18:04,  2.10s/it]

tensor(6307, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 968/4921 [33:27<2:20:19,  2.13s/it]

tensor(5607, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 969/4921 [33:29<2:19:02,  2.11s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 970/4921 [33:31<2:18:39,  2.11s/it]

tensor(6307, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 971/4921 [33:34<2:18:39,  2.11s/it]

tensor(5607, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 972/4921 [33:36<2:19:01,  2.11s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 973/4921 [33:38<2:18:16,  2.10s/it]

tensor(5607, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 974/4921 [33:40<2:17:21,  2.09s/it]

tensor(5607, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 975/4921 [33:42<2:16:07,  2.07s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 976/4921 [33:44<2:18:12,  2.10s/it]

tensor(6307, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 977/4921 [33:46<2:18:19,  2.10s/it]

tensor(4907, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 978/4921 [33:48<2:18:40,  2.11s/it]

tensor(7007, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 979/4921 [33:50<2:17:53,  2.10s/it]

tensor(9107, device='cuda:0') tensor(211, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 980/4921 [33:52<2:16:25,  2.08s/it]

tensor(6307, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 981/4921 [33:54<2:15:25,  2.06s/it]

tensor(6307, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 982/4921 [33:56<2:15:13,  2.06s/it]

tensor(6307, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 983/4921 [33:58<2:16:17,  2.08s/it]

tensor(9107, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|█▉        | 984/4921 [34:01<2:20:08,  2.14s/it]

tensor(5607, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 985/4921 [34:03<2:18:51,  2.12s/it]

tensor(5607, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 986/4921 [34:05<2:17:30,  2.10s/it]

tensor(6307, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 987/4921 [34:07<2:15:18,  2.06s/it]

tensor(6307, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 988/4921 [34:09<2:14:17,  2.05s/it]

tensor(6307, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 989/4921 [34:11<2:14:40,  2.06s/it]

tensor(7007, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 990/4921 [34:13<2:14:35,  2.05s/it]

tensor(6307, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 991/4921 [34:15<2:15:19,  2.07s/it]

tensor(7707, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 992/4921 [34:17<2:19:09,  2.13s/it]

tensor(5607, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 993/4921 [34:19<2:16:44,  2.09s/it]

tensor(4907, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 994/4921 [34:21<2:15:54,  2.08s/it]

tensor(5607, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 995/4921 [34:23<2:15:18,  2.07s/it]

tensor(7007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 996/4921 [34:26<2:15:10,  2.07s/it]

tensor(5607, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 997/4921 [34:28<2:14:57,  2.06s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 998/4921 [34:30<2:14:57,  2.06s/it]

tensor(7007, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 999/4921 [34:32<2:15:35,  2.07s/it]

tensor(5607, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1000/4921 [34:34<2:17:31,  2.10s/it]

tensor(7007, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1001/4921 [34:36<2:16:04,  2.08s/it]

tensor(7007, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1002/4921 [34:38<2:15:22,  2.07s/it]

tensor(7007, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1003/4921 [34:40<2:14:20,  2.06s/it]

tensor(7007, device='cuda:0') tensor(28, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1004/4921 [34:42<2:14:28,  2.06s/it]

tensor(4907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1005/4921 [34:44<2:14:59,  2.07s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1006/4921 [34:46<2:15:19,  2.07s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1007/4921 [34:48<2:16:03,  2.09s/it]

tensor(9107, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 20%|██        | 1008/4921 [34:51<2:18:11,  2.12s/it]

tensor(6307, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1009/4921 [34:53<2:16:01,  2.09s/it]

tensor(7707, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1010/4921 [34:55<2:14:36,  2.07s/it]

tensor(7707, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1011/4921 [34:57<2:15:04,  2.07s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1012/4921 [34:59<2:14:49,  2.07s/it]

tensor(5607, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1013/4921 [35:01<2:14:25,  2.06s/it]

tensor(9807, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1014/4921 [35:03<2:14:15,  2.06s/it]

tensor(9807, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1015/4921 [35:05<2:13:55,  2.06s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1016/4921 [35:07<2:15:48,  2.09s/it]

tensor(6307, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1017/4921 [35:09<2:15:12,  2.08s/it]

tensor(7007, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1018/4921 [35:11<2:15:12,  2.08s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1019/4921 [35:13<2:15:37,  2.09s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1020/4921 [35:15<2:14:19,  2.07s/it]

tensor(4907, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1021/4921 [35:17<2:13:48,  2.06s/it]

tensor(8407, device='cuda:0') tensor(153, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1022/4921 [35:19<2:12:48,  2.04s/it]

tensor(5607, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1023/4921 [35:21<2:12:57,  2.05s/it]

tensor(7707, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1024/4921 [35:24<2:15:58,  2.09s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1025/4921 [35:26<2:15:49,  2.09s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1026/4921 [35:28<2:15:19,  2.08s/it]

tensor(7707, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1027/4921 [35:30<2:14:28,  2.07s/it]

tensor(7007, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1028/4921 [35:32<2:13:25,  2.06s/it]

tensor(4907, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1029/4921 [35:34<2:12:49,  2.05s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1030/4921 [35:36<2:11:57,  2.03s/it]

tensor(6307, device='cuda:0') tensor(164, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1031/4921 [35:38<2:12:01,  2.04s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1032/4921 [35:40<2:15:45,  2.09s/it]

tensor(5607, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1033/4921 [35:42<2:14:14,  2.07s/it]

tensor(8407, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1034/4921 [35:44<2:13:12,  2.06s/it]

tensor(4907, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1035/4921 [35:46<2:12:48,  2.05s/it]

tensor(7007, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1036/4921 [35:48<2:13:06,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1037/4921 [35:50<2:12:41,  2.05s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1038/4921 [35:52<2:14:23,  2.08s/it]

tensor(4907, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1039/4921 [35:55<2:14:10,  2.07s/it]

tensor(14007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1040/4921 [35:57<2:16:26,  2.11s/it]

tensor(4907, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1041/4921 [35:59<2:14:51,  2.09s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1042/4921 [36:01<2:13:35,  2.07s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1043/4921 [36:03<2:12:50,  2.06s/it]

tensor(8407, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1044/4921 [36:05<2:13:34,  2.07s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██        | 1045/4921 [36:07<2:14:01,  2.07s/it]

tensor(8407, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1046/4921 [36:09<2:14:24,  2.08s/it]

tensor(7707, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1047/4921 [36:11<2:13:13,  2.06s/it]

tensor(6307, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1048/4921 [36:13<2:16:43,  2.12s/it]

tensor(7007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1049/4921 [36:15<2:15:12,  2.10s/it]

tensor(6307, device='cuda:0') tensor(256, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1050/4921 [36:17<2:14:21,  2.08s/it]

tensor(4907, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1051/4921 [36:20<2:14:32,  2.09s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1052/4921 [36:22<2:15:19,  2.10s/it]

tensor(11907, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1053/4921 [36:24<2:15:07,  2.10s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1054/4921 [36:26<2:14:11,  2.08s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1055/4921 [36:28<2:13:40,  2.07s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1056/4921 [36:30<2:17:30,  2.13s/it]

tensor(4907, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1057/4921 [36:32<2:15:58,  2.11s/it]

tensor(7007, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 21%|██▏       | 1058/4921 [36:34<2:15:43,  2.11s/it]

tensor(12607, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1059/4921 [36:36<2:15:00,  2.10s/it]

tensor(7707, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1060/4921 [36:38<2:13:46,  2.08s/it]

tensor(5607, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1061/4921 [36:40<2:12:44,  2.06s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1062/4921 [36:43<2:12:35,  2.06s/it]

tensor(6307, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1063/4921 [36:45<2:12:12,  2.06s/it]

tensor(6307, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1064/4921 [36:47<2:15:25,  2.11s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1065/4921 [36:49<2:14:54,  2.10s/it]

tensor(5607, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1066/4921 [36:51<2:14:01,  2.09s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1067/4921 [36:53<2:12:42,  2.07s/it]

tensor(5607, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1068/4921 [36:55<2:12:17,  2.06s/it]

tensor(5607, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1069/4921 [36:57<2:12:54,  2.07s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1070/4921 [36:59<2:12:25,  2.06s/it]

tensor(6307, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1071/4921 [37:01<2:12:06,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1072/4921 [37:03<2:15:10,  2.11s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1073/4921 [37:05<2:13:46,  2.09s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1074/4921 [37:07<2:13:07,  2.08s/it]

tensor(6307, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1075/4921 [37:09<2:12:04,  2.06s/it]

tensor(5607, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1076/4921 [37:11<2:10:51,  2.04s/it]

tensor(4907, device='cuda:0') tensor(589, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1077/4921 [37:13<2:09:35,  2.02s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1078/4921 [37:16<2:10:23,  2.04s/it]

tensor(4907, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1079/4921 [37:18<2:11:11,  2.05s/it]

tensor(5607, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1080/4921 [37:20<2:12:46,  2.07s/it]

tensor(7007, device='cuda:0') tensor(94, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1081/4921 [37:22<2:11:39,  2.06s/it]

tensor(7007, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1082/4921 [37:24<2:10:58,  2.05s/it]

tensor(6307, device='cuda:0') tensor(94, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1083/4921 [37:26<2:09:58,  2.03s/it]

tensor(7007, device='cuda:0') tensor(165, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1084/4921 [37:28<2:10:19,  2.04s/it]

tensor(4907, device='cuda:0') tensor(99, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1085/4921 [37:30<2:10:22,  2.04s/it]

tensor(11207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1086/4921 [37:32<2:10:10,  2.04s/it]

tensor(4907, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1087/4921 [37:34<2:11:17,  2.05s/it]

tensor(16807, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1088/4921 [37:36<2:12:54,  2.08s/it]

tensor(7007, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1089/4921 [37:38<2:11:38,  2.06s/it]

tensor(7707, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1090/4921 [37:40<2:09:52,  2.03s/it]

tensor(12607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1091/4921 [37:42<2:09:09,  2.02s/it]

tensor(5607, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1092/4921 [37:44<2:09:19,  2.03s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1093/4921 [37:46<2:09:22,  2.03s/it]

tensor(4907, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1094/4921 [37:48<2:09:34,  2.03s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1095/4921 [37:50<2:09:08,  2.03s/it]

tensor(11907, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1096/4921 [37:52<2:11:07,  2.06s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1097/4921 [37:54<2:09:53,  2.04s/it]

tensor(4907, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1098/4921 [37:56<2:09:51,  2.04s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1099/4921 [37:59<2:11:12,  2.06s/it]

tensor(7007, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1100/4921 [38:01<2:10:51,  2.05s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1101/4921 [38:03<2:10:00,  2.04s/it]

tensor(6307, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1102/4921 [38:05<2:09:21,  2.03s/it]

tensor(7007, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1103/4921 [38:07<2:08:24,  2.02s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1104/4921 [38:09<2:09:44,  2.04s/it]

tensor(4907, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1105/4921 [38:11<2:11:39,  2.07s/it]

tensor(4907, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1106/4921 [38:13<2:11:49,  2.07s/it]

tensor(7707, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 22%|██▏       | 1107/4921 [38:15<2:10:54,  2.06s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1108/4921 [38:17<2:09:49,  2.04s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1109/4921 [38:19<2:08:21,  2.02s/it]

tensor(11207, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1110/4921 [38:21<2:08:48,  2.03s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1111/4921 [38:23<2:08:15,  2.02s/it]

tensor(5607, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1112/4921 [38:25<2:11:18,  2.07s/it]

tensor(5607, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1113/4921 [38:27<2:10:40,  2.06s/it]

tensor(4907, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1114/4921 [38:29<2:09:10,  2.04s/it]

tensor(5607, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1115/4921 [38:31<2:07:42,  2.01s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1116/4921 [38:33<2:07:35,  2.01s/it]

tensor(4907, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1117/4921 [38:35<2:07:34,  2.01s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1118/4921 [38:37<2:07:34,  2.01s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1119/4921 [38:39<2:08:06,  2.02s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1120/4921 [38:41<2:11:16,  2.07s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1121/4921 [38:43<2:10:12,  2.06s/it]

tensor(7007, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1122/4921 [38:45<2:08:26,  2.03s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1123/4921 [38:47<2:08:33,  2.03s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1124/4921 [38:49<2:08:49,  2.04s/it]

tensor(11207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1125/4921 [38:52<2:09:32,  2.05s/it]

tensor(4907, device='cuda:0') tensor(132, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1126/4921 [38:54<2:10:52,  2.07s/it]

tensor(11907, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1127/4921 [38:56<2:11:01,  2.07s/it]

tensor(4907, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1128/4921 [38:58<2:11:51,  2.09s/it]

tensor(4907, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1129/4921 [39:00<2:10:52,  2.07s/it]

tensor(9807, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1130/4921 [39:02<2:09:59,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1131/4921 [39:04<2:09:25,  2.05s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1132/4921 [39:06<2:10:33,  2.07s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1133/4921 [39:08<2:09:50,  2.06s/it]

tensor(9107, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1134/4921 [39:10<2:08:46,  2.04s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1135/4921 [39:12<2:07:48,  2.03s/it]

tensor(9807, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1136/4921 [39:14<2:10:24,  2.07s/it]

tensor(5607, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1137/4921 [39:16<2:09:57,  2.06s/it]

tensor(8407, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1138/4921 [39:18<2:09:46,  2.06s/it]

tensor(4907, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1139/4921 [39:20<2:10:42,  2.07s/it]

tensor(5607, device='cuda:0') tensor(198, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1140/4921 [39:22<2:09:44,  2.06s/it]

tensor(5607, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1141/4921 [39:24<2:09:25,  2.05s/it]

tensor(6307, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1142/4921 [39:27<2:08:55,  2.05s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1143/4921 [39:29<2:07:57,  2.03s/it]

tensor(4907, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1144/4921 [39:31<2:11:01,  2.08s/it]

tensor(5607, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1145/4921 [39:33<2:10:23,  2.07s/it]

tensor(7707, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1146/4921 [39:35<2:10:30,  2.07s/it]

tensor(6307, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1147/4921 [39:37<2:10:26,  2.07s/it]

tensor(9107, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1148/4921 [39:39<2:09:24,  2.06s/it]

tensor(16107, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1149/4921 [39:41<2:08:28,  2.04s/it]

tensor(4907, device='cuda:0') tensor(21, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1150/4921 [39:43<2:07:31,  2.03s/it]

tensor(6307, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1151/4921 [39:45<2:06:32,  2.01s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1152/4921 [39:47<2:09:02,  2.05s/it]

tensor(6307, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1153/4921 [39:49<2:08:55,  2.05s/it]

tensor(5607, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1154/4921 [39:51<2:08:22,  2.04s/it]

tensor(4907, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1155/4921 [39:53<2:07:32,  2.03s/it]

tensor(5607, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 23%|██▎       | 1156/4921 [39:55<2:06:57,  2.02s/it]

tensor(4907, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1157/4921 [39:57<2:06:15,  2.01s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1158/4921 [39:59<2:05:25,  2.00s/it]

tensor(8407, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1159/4921 [40:01<2:06:04,  2.01s/it]

tensor(6307, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1160/4921 [40:03<2:09:45,  2.07s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1161/4921 [40:05<2:08:42,  2.05s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1162/4921 [40:07<2:07:37,  2.04s/it]

tensor(9107, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1163/4921 [40:09<2:07:45,  2.04s/it]

tensor(7007, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1164/4921 [40:11<2:06:46,  2.02s/it]

tensor(7007, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1165/4921 [40:13<2:06:29,  2.02s/it]

tensor(7007, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1166/4921 [40:15<2:07:17,  2.03s/it]

tensor(7707, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1167/4921 [40:18<2:07:52,  2.04s/it]

tensor(5607, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▎       | 1168/4921 [40:20<2:09:59,  2.08s/it]

tensor(7707, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1169/4921 [40:22<2:08:43,  2.06s/it]

tensor(5607, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1170/4921 [40:24<2:07:56,  2.05s/it]

tensor(9807, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1171/4921 [40:26<2:07:32,  2.04s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1172/4921 [40:28<2:07:45,  2.04s/it]

tensor(6307, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1173/4921 [40:30<2:09:00,  2.07s/it]

tensor(7707, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1174/4921 [40:32<2:08:43,  2.06s/it]

tensor(5607, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1175/4921 [40:34<2:08:12,  2.05s/it]

tensor(5607, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1176/4921 [40:36<2:10:06,  2.08s/it]

tensor(7707, device='cuda:0') tensor(99, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1177/4921 [40:38<2:08:40,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1178/4921 [40:40<2:07:53,  2.05s/it]

tensor(7007, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1179/4921 [40:42<2:08:26,  2.06s/it]

tensor(4907, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1180/4921 [40:44<2:08:48,  2.07s/it]

tensor(4907, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1181/4921 [40:46<2:08:58,  2.07s/it]

tensor(9107, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1182/4921 [40:48<2:08:12,  2.06s/it]

tensor(6307, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1183/4921 [40:51<2:07:39,  2.05s/it]

tensor(11907, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1184/4921 [40:53<2:09:31,  2.08s/it]

tensor(6307, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1185/4921 [40:55<2:08:27,  2.06s/it]

tensor(6307, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1186/4921 [40:57<2:08:53,  2.07s/it]

tensor(5607, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1187/4921 [40:59<2:08:06,  2.06s/it]

tensor(9807, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1188/4921 [41:01<2:07:26,  2.05s/it]

tensor(8407, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1189/4921 [41:03<2:06:37,  2.04s/it]

tensor(4907, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1190/4921 [41:05<2:06:34,  2.04s/it]

tensor(9107, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1191/4921 [41:07<2:06:45,  2.04s/it]

tensor(13307, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1192/4921 [41:09<2:10:02,  2.09s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1193/4921 [41:11<2:09:51,  2.09s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1194/4921 [41:13<2:09:28,  2.08s/it]

tensor(5607, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1195/4921 [41:15<2:08:46,  2.07s/it]

tensor(4907, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1196/4921 [41:17<2:07:39,  2.06s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1197/4921 [41:19<2:06:48,  2.04s/it]

tensor(6307, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1198/4921 [41:21<2:06:43,  2.04s/it]

tensor(14007, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1199/4921 [41:23<2:06:56,  2.05s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1200/4921 [41:26<2:11:11,  2.12s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1201/4921 [41:28<2:09:35,  2.09s/it]

tensor(4907, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1202/4921 [41:30<2:08:30,  2.07s/it]

tensor(5607, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1203/4921 [41:32<2:07:35,  2.06s/it]

tensor(5607, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1204/4921 [41:34<2:07:09,  2.05s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 24%|██▍       | 1205/4921 [41:36<2:06:06,  2.04s/it]

tensor(4907, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1206/4921 [41:38<2:07:36,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1207/4921 [41:40<2:07:10,  2.05s/it]

tensor(8407, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1208/4921 [41:42<2:08:12,  2.07s/it]

tensor(6307, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1209/4921 [41:44<2:07:25,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1210/4921 [41:46<2:06:35,  2.05s/it]

tensor(5607, device='cuda:0') tensor(84, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1211/4921 [41:48<2:05:31,  2.03s/it]

tensor(7007, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1212/4921 [41:50<2:05:57,  2.04s/it]

tensor(5607, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1213/4921 [41:52<2:06:13,  2.04s/it]

tensor(4907, device='cuda:0') tensor(25, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1214/4921 [41:54<2:06:11,  2.04s/it]

tensor(4907, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1215/4921 [41:56<2:05:02,  2.02s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1216/4921 [41:58<2:08:12,  2.08s/it]

tensor(4907, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1217/4921 [42:01<2:07:23,  2.06s/it]

tensor(21007, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1218/4921 [42:03<2:06:29,  2.05s/it]

tensor(4907, device='cuda:0') tensor(236, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1219/4921 [42:05<2:06:16,  2.05s/it]

tensor(13307, device='cuda:0') tensor(120, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1220/4921 [42:07<2:06:17,  2.05s/it]

tensor(4907, device='cuda:0') tensor(173, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1221/4921 [42:09<2:05:59,  2.04s/it]

tensor(6307, device='cuda:0') tensor(125, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1222/4921 [42:11<2:05:53,  2.04s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1223/4921 [42:13<2:05:26,  2.04s/it]

tensor(7707, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1224/4921 [42:15<2:07:19,  2.07s/it]

tensor(4907, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1225/4921 [42:17<2:06:54,  2.06s/it]

tensor(5607, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1226/4921 [42:19<2:06:30,  2.05s/it]

tensor(7007, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1227/4921 [42:21<2:07:20,  2.07s/it]

tensor(5607, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1228/4921 [42:23<2:06:34,  2.06s/it]

tensor(6307, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1229/4921 [42:25<2:05:40,  2.04s/it]

tensor(4907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▍       | 1230/4921 [42:27<2:04:32,  2.02s/it]

tensor(4907, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1231/4921 [42:29<2:03:52,  2.01s/it]

tensor(6307, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1232/4921 [42:31<2:06:13,  2.05s/it]

tensor(4907, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1233/4921 [42:33<2:06:36,  2.06s/it]

tensor(6307, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1234/4921 [42:35<2:08:14,  2.09s/it]

tensor(11207, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1235/4921 [42:37<2:07:13,  2.07s/it]

tensor(7707, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1236/4921 [42:39<2:05:52,  2.05s/it]

tensor(5607, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1237/4921 [42:41<2:04:43,  2.03s/it]

tensor(5607, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1238/4921 [42:43<2:03:37,  2.01s/it]

tensor(5607, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1239/4921 [42:45<2:02:56,  2.00s/it]

tensor(6307, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1240/4921 [42:48<2:07:14,  2.07s/it]

tensor(6307, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1241/4921 [42:50<2:07:59,  2.09s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1242/4921 [42:52<2:05:56,  2.05s/it]

tensor(5607, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1243/4921 [42:54<2:04:44,  2.03s/it]

tensor(4207, device='cuda:0') tensor(589, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1244/4921 [42:56<2:03:51,  2.02s/it]

tensor(6307, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1245/4921 [42:58<2:02:40,  2.00s/it]

tensor(7007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1246/4921 [43:00<2:02:56,  2.01s/it]

tensor(5607, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1247/4921 [43:02<2:04:41,  2.04s/it]

tensor(5607, device='cuda:0') tensor(153, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1248/4921 [43:04<2:07:38,  2.09s/it]

tensor(6307, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1249/4921 [43:06<2:06:37,  2.07s/it]

tensor(7007, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1250/4921 [43:08<2:05:21,  2.05s/it]

tensor(13307, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1251/4921 [43:10<2:04:15,  2.03s/it]

tensor(5607, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1252/4921 [43:12<2:03:12,  2.01s/it]

tensor(4907, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1253/4921 [43:14<2:03:48,  2.03s/it]

tensor(4907, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 25%|██▌       | 1254/4921 [43:16<2:04:12,  2.03s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1255/4921 [43:18<2:03:44,  2.03s/it]

tensor(7007, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1256/4921 [43:20<2:06:45,  2.08s/it]

tensor(14707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1257/4921 [43:22<2:05:46,  2.06s/it]

tensor(6307, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1258/4921 [43:24<2:04:19,  2.04s/it]

tensor(7007, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1259/4921 [43:26<2:03:53,  2.03s/it]

tensor(5607, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1260/4921 [43:28<2:04:07,  2.03s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1261/4921 [43:30<2:03:57,  2.03s/it]

tensor(8407, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1262/4921 [43:32<2:03:30,  2.03s/it]

tensor(6307, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1263/4921 [43:34<2:03:51,  2.03s/it]

tensor(7007, device='cuda:0') tensor(134, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1264/4921 [43:37<2:05:30,  2.06s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1265/4921 [43:39<2:04:31,  2.04s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1266/4921 [43:41<2:03:54,  2.03s/it]

tensor(6307, device='cuda:0') tensor(18, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1267/4921 [43:43<2:04:04,  2.04s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1268/4921 [43:45<2:04:17,  2.04s/it]

tensor(4907, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1269/4921 [43:47<2:03:48,  2.03s/it]

tensor(15407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1270/4921 [43:49<2:03:05,  2.02s/it]

tensor(4907, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1271/4921 [43:51<2:02:38,  2.02s/it]

tensor(4907, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1272/4921 [43:53<2:05:40,  2.07s/it]

tensor(4207, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1273/4921 [43:55<2:04:36,  2.05s/it]

tensor(4907, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1274/4921 [43:57<2:04:28,  2.05s/it]

tensor(6307, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1275/4921 [43:59<2:04:31,  2.05s/it]

tensor(8407, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1276/4921 [44:01<2:03:47,  2.04s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1277/4921 [44:03<2:02:50,  2.02s/it]

tensor(5607, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1278/4921 [44:05<2:03:27,  2.03s/it]

tensor(5607, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1279/4921 [44:07<2:03:04,  2.03s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1280/4921 [44:09<2:06:05,  2.08s/it]

tensor(4907, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1281/4921 [44:11<2:05:23,  2.07s/it]

tensor(4907, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1282/4921 [44:13<2:04:34,  2.05s/it]

tensor(7707, device='cuda:0') tensor(174, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1283/4921 [44:15<2:04:25,  2.05s/it]

tensor(6307, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1284/4921 [44:17<2:03:42,  2.04s/it]

tensor(4907, device='cuda:0') tensor(262, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1285/4921 [44:19<2:02:57,  2.03s/it]

tensor(9807, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1286/4921 [44:21<2:02:16,  2.02s/it]

tensor(6307, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1287/4921 [44:23<2:02:50,  2.03s/it]

tensor(5607, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1288/4921 [44:26<2:07:03,  2.10s/it]

tensor(9807, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1289/4921 [44:28<2:05:33,  2.07s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1290/4921 [44:30<2:04:39,  2.06s/it]

tensor(4907, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▌       | 1291/4921 [44:32<2:03:56,  2.05s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1292/4921 [44:34<2:03:13,  2.04s/it]

tensor(5607, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1293/4921 [44:36<2:02:24,  2.02s/it]

tensor(4907, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1294/4921 [44:38<2:01:48,  2.02s/it]

tensor(4907, device='cuda:0') tensor(578, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1295/4921 [44:40<2:02:54,  2.03s/it]

tensor(5607, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1296/4921 [44:42<2:05:11,  2.07s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1297/4921 [44:44<2:04:49,  2.07s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1298/4921 [44:46<2:04:01,  2.05s/it]

tensor(8407, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1299/4921 [44:48<2:02:43,  2.03s/it]

tensor(5607, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1300/4921 [44:50<2:02:48,  2.03s/it]

tensor(7707, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1301/4921 [44:52<2:03:15,  2.04s/it]

tensor(5607, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1302/4921 [44:54<2:03:31,  2.05s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1303/4921 [44:56<2:03:51,  2.05s/it]

tensor(5607, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 26%|██▋       | 1304/4921 [44:58<2:05:34,  2.08s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1305/4921 [45:00<2:04:34,  2.07s/it]

tensor(4907, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1306/4921 [45:03<2:03:57,  2.06s/it]

tensor(5607, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1307/4921 [45:05<2:04:03,  2.06s/it]

tensor(4207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1308/4921 [45:07<2:04:18,  2.06s/it]

tensor(7007, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1309/4921 [45:09<2:05:07,  2.08s/it]

tensor(7707, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1310/4921 [45:11<2:04:03,  2.06s/it]

tensor(4907, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1311/4921 [45:13<2:03:04,  2.05s/it]

tensor(4907, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1312/4921 [45:15<2:05:02,  2.08s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1313/4921 [45:17<2:03:45,  2.06s/it]

tensor(8407, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1314/4921 [45:19<2:04:03,  2.06s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1315/4921 [45:21<2:05:46,  2.09s/it]

tensor(7707, device='cuda:0') tensor(94, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1316/4921 [45:23<2:04:10,  2.07s/it]

tensor(5607, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1317/4921 [45:25<2:03:24,  2.05s/it]

tensor(7007, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1318/4921 [45:27<2:02:53,  2.05s/it]

tensor(4907, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1319/4921 [45:29<2:02:59,  2.05s/it]

tensor(12607, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1320/4921 [45:32<2:05:42,  2.09s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1321/4921 [45:34<2:06:29,  2.11s/it]

tensor(9807, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1322/4921 [45:36<2:05:44,  2.10s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1323/4921 [45:38<2:04:09,  2.07s/it]

tensor(7007, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1324/4921 [45:40<2:03:40,  2.06s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1325/4921 [45:42<2:03:23,  2.06s/it]

tensor(7707, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1326/4921 [45:44<2:02:15,  2.04s/it]

tensor(6307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1327/4921 [45:46<2:02:36,  2.05s/it]

tensor(4907, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1328/4921 [45:48<2:05:31,  2.10s/it]

tensor(4907, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1329/4921 [45:50<2:04:53,  2.09s/it]

tensor(4907, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1330/4921 [45:52<2:03:26,  2.06s/it]

tensor(4907, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1331/4921 [45:54<2:03:37,  2.07s/it]

tensor(4907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1332/4921 [45:56<2:02:59,  2.06s/it]

tensor(7007, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1333/4921 [45:58<2:02:31,  2.05s/it]

tensor(5607, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1334/4921 [46:00<2:03:53,  2.07s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1335/4921 [46:03<2:04:40,  2.09s/it]

tensor(5607, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1336/4921 [46:05<2:06:31,  2.12s/it]

tensor(4907, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1337/4921 [46:07<2:04:23,  2.08s/it]

tensor(8407, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1338/4921 [46:09<2:02:46,  2.06s/it]

tensor(7007, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1339/4921 [46:11<2:01:47,  2.04s/it]

tensor(5607, device='cuda:0') tensor(45, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1340/4921 [46:13<2:01:18,  2.03s/it]

tensor(4207, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1341/4921 [46:15<2:01:22,  2.03s/it]

tensor(7707, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1342/4921 [46:17<2:02:22,  2.05s/it]

tensor(6307, device='cuda:0') tensor(144, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1343/4921 [46:19<2:01:31,  2.04s/it]

tensor(7007, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1344/4921 [46:21<2:03:15,  2.07s/it]

tensor(5607, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1345/4921 [46:23<2:01:39,  2.04s/it]

tensor(7707, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1346/4921 [46:25<2:00:46,  2.03s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1347/4921 [46:27<2:00:09,  2.02s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1348/4921 [46:29<2:00:51,  2.03s/it]

tensor(6307, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1349/4921 [46:31<2:01:30,  2.04s/it]

tensor(5607, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1350/4921 [46:33<2:01:06,  2.03s/it]

tensor(5607, device='cuda:0') tensor(195, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1351/4921 [46:35<2:00:28,  2.02s/it]

tensor(5607, device='cuda:0') tensor(45, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1352/4921 [46:37<2:03:06,  2.07s/it]

tensor(4207, device='cuda:0') tensor(247, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 27%|██▋       | 1353/4921 [46:39<2:01:20,  2.04s/it]

tensor(4907, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1354/4921 [46:41<2:00:20,  2.02s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1355/4921 [46:43<2:00:39,  2.03s/it]

tensor(4907, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1356/4921 [46:45<2:01:06,  2.04s/it]

tensor(7707, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1357/4921 [46:47<2:00:52,  2.03s/it]

tensor(4907, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1358/4921 [46:49<2:00:50,  2.03s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1359/4921 [46:51<1:59:49,  2.02s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1360/4921 [46:54<2:01:36,  2.05s/it]

tensor(4907, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1361/4921 [46:56<2:00:51,  2.04s/it]

tensor(12607, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1362/4921 [46:58<2:01:16,  2.04s/it]

tensor(4207, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1363/4921 [47:00<2:00:11,  2.03s/it]

tensor(7707, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1364/4921 [47:02<1:59:44,  2.02s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1365/4921 [47:04<1:59:39,  2.02s/it]

tensor(6307, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1366/4921 [47:06<1:59:06,  2.01s/it]

tensor(5607, device='cuda:0') tensor(18, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1367/4921 [47:08<1:59:07,  2.01s/it]

tensor(4207, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1368/4921 [47:10<2:02:20,  2.07s/it]

tensor(5607, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1369/4921 [47:12<2:01:44,  2.06s/it]

tensor(4907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1370/4921 [47:14<2:00:46,  2.04s/it]

tensor(5607, device='cuda:0') tensor(18, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1371/4921 [47:16<2:00:27,  2.04s/it]

tensor(6307, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1372/4921 [47:18<1:59:39,  2.02s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1373/4921 [47:20<1:59:41,  2.02s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1374/4921 [47:22<1:59:33,  2.02s/it]

tensor(4907, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1375/4921 [47:24<2:00:19,  2.04s/it]

tensor(4207, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1376/4921 [47:26<2:03:27,  2.09s/it]

tensor(7707, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1377/4921 [47:28<2:02:14,  2.07s/it]

tensor(7707, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1378/4921 [47:30<2:00:54,  2.05s/it]

tensor(7007, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1379/4921 [47:32<2:00:26,  2.04s/it]

tensor(4907, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1380/4921 [47:34<1:59:53,  2.03s/it]

tensor(11907, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1381/4921 [47:36<2:00:39,  2.05s/it]

tensor(7007, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1382/4921 [47:38<2:00:54,  2.05s/it]

tensor(5607, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1383/4921 [47:40<2:01:31,  2.06s/it]

tensor(6307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1384/4921 [47:43<2:02:45,  2.08s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1385/4921 [47:45<2:00:39,  2.05s/it]

tensor(4907, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1386/4921 [47:47<2:00:28,  2.04s/it]

tensor(5607, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1387/4921 [47:49<1:59:51,  2.03s/it]

tensor(4907, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1388/4921 [47:51<2:00:18,  2.04s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1389/4921 [47:53<2:00:57,  2.05s/it]

tensor(4207, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1390/4921 [47:55<2:00:35,  2.05s/it]

tensor(7707, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1391/4921 [47:57<1:59:27,  2.03s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1392/4921 [47:59<2:00:57,  2.06s/it]

tensor(5607, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1393/4921 [48:01<1:59:51,  2.04s/it]

tensor(8407, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1394/4921 [48:03<1:58:25,  2.01s/it]

tensor(6307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1395/4921 [48:05<1:59:06,  2.03s/it]

tensor(7707, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1396/4921 [48:07<1:59:30,  2.03s/it]

tensor(5607, device='cuda:0') tensor(120, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1397/4921 [48:09<1:58:45,  2.02s/it]

tensor(5607, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1398/4921 [48:11<1:58:27,  2.02s/it]

tensor(7007, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1399/4921 [48:13<1:58:25,  2.02s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1400/4921 [48:15<2:00:18,  2.05s/it]

tensor(8407, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1401/4921 [48:17<1:59:48,  2.04s/it]

tensor(8407, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 28%|██▊       | 1402/4921 [48:19<2:00:13,  2.05s/it]

tensor(6307, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1403/4921 [48:21<2:00:38,  2.06s/it]

tensor(4907, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1404/4921 [48:23<2:01:28,  2.07s/it]

tensor(4907, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1405/4921 [48:25<1:59:59,  2.05s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1406/4921 [48:27<1:59:02,  2.03s/it]

tensor(7707, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1407/4921 [48:29<1:58:21,  2.02s/it]

tensor(4907, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1408/4921 [48:32<2:01:56,  2.08s/it]

tensor(5607, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1409/4921 [48:34<2:00:56,  2.07s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1410/4921 [48:36<2:00:15,  2.06s/it]

tensor(4207, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1411/4921 [48:38<1:59:18,  2.04s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1412/4921 [48:40<1:58:04,  2.02s/it]

tensor(15407, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1413/4921 [48:42<1:57:51,  2.02s/it]

tensor(6307, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▊       | 1414/4921 [48:44<1:58:01,  2.02s/it]

tensor(6307, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1415/4921 [48:46<1:58:36,  2.03s/it]

tensor(7007, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1416/4921 [48:48<2:01:19,  2.08s/it]

tensor(8407, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1417/4921 [48:50<2:01:04,  2.07s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1418/4921 [48:52<1:59:07,  2.04s/it]

tensor(4907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1419/4921 [48:54<1:58:20,  2.03s/it]

tensor(7007, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1420/4921 [48:56<1:58:12,  2.03s/it]

tensor(4907, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1421/4921 [48:58<1:58:05,  2.02s/it]

tensor(6307, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1422/4921 [49:00<1:57:59,  2.02s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1423/4921 [49:02<1:58:11,  2.03s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1424/4921 [49:04<2:00:49,  2.07s/it]

tensor(4207, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1425/4921 [49:06<1:59:31,  2.05s/it]

tensor(4207, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1426/4921 [49:08<1:59:31,  2.05s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1427/4921 [49:10<1:59:04,  2.04s/it]

tensor(5607, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1428/4921 [49:12<1:58:37,  2.04s/it]

tensor(10507, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1429/4921 [49:14<1:59:08,  2.05s/it]

tensor(4907, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1430/4921 [49:16<2:00:04,  2.06s/it]

tensor(5607, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1431/4921 [49:18<1:58:45,  2.04s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1432/4921 [49:21<2:00:50,  2.08s/it]

tensor(8407, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1433/4921 [49:23<1:59:17,  2.05s/it]

tensor(7007, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1434/4921 [49:25<1:58:29,  2.04s/it]

tensor(7007, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1435/4921 [49:27<1:58:13,  2.03s/it]

tensor(6307, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1436/4921 [49:29<1:58:28,  2.04s/it]

tensor(5607, device='cuda:0') tensor(230, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1437/4921 [49:31<1:58:51,  2.05s/it]

tensor(7007, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1438/4921 [49:33<1:59:08,  2.05s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1439/4921 [49:35<1:58:54,  2.05s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1440/4921 [49:37<2:00:38,  2.08s/it]

tensor(11907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1441/4921 [49:39<2:00:12,  2.07s/it]

tensor(11207, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1442/4921 [49:41<1:59:58,  2.07s/it]

tensor(4907, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1443/4921 [49:43<2:01:22,  2.09s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1444/4921 [49:45<2:00:45,  2.08s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1445/4921 [49:47<2:00:54,  2.09s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1446/4921 [49:49<1:59:34,  2.06s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1447/4921 [49:51<1:58:31,  2.05s/it]

tensor(5607, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1448/4921 [49:54<2:00:24,  2.08s/it]

tensor(4907, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1449/4921 [49:56<1:59:52,  2.07s/it]

tensor(4907, device='cuda:0') tensor(148, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1450/4921 [49:58<2:00:14,  2.08s/it]

tensor(4907, device='cuda:0') tensor(162, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 29%|██▉       | 1451/4921 [50:00<1:59:32,  2.07s/it]

tensor(9807, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1452/4921 [50:02<1:59:20,  2.06s/it]

tensor(11907, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1453/4921 [50:04<1:59:12,  2.06s/it]

tensor(7707, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1454/4921 [50:06<1:58:24,  2.05s/it]

tensor(11207, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1455/4921 [50:08<1:58:26,  2.05s/it]

tensor(5607, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1456/4921 [50:10<2:01:43,  2.11s/it]

tensor(4907, device='cuda:0') tensor(94, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1457/4921 [50:12<2:01:10,  2.10s/it]

tensor(4907, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1458/4921 [50:14<1:59:11,  2.07s/it]

tensor(4907, device='cuda:0') tensor(129, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1459/4921 [50:16<1:58:56,  2.06s/it]

tensor(4907, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1460/4921 [50:18<1:57:34,  2.04s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1461/4921 [50:20<1:57:16,  2.03s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1462/4921 [50:22<1:58:08,  2.05s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1463/4921 [50:25<1:58:25,  2.05s/it]

tensor(4207, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1464/4921 [50:27<2:00:07,  2.09s/it]

tensor(7007, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1465/4921 [50:29<1:59:27,  2.07s/it]

tensor(7007, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1466/4921 [50:31<1:59:09,  2.07s/it]

tensor(9107, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1467/4921 [50:33<1:58:24,  2.06s/it]

tensor(7007, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1468/4921 [50:35<1:58:06,  2.05s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1469/4921 [50:37<1:58:08,  2.05s/it]

tensor(5607, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1470/4921 [50:39<1:59:12,  2.07s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1471/4921 [50:41<1:57:58,  2.05s/it]

tensor(8407, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1472/4921 [50:43<1:59:14,  2.07s/it]

tensor(9807, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1473/4921 [50:45<1:58:03,  2.05s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1474/4921 [50:47<1:57:01,  2.04s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1475/4921 [50:49<1:56:47,  2.03s/it]

tensor(4207, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|██▉       | 1476/4921 [50:51<1:57:00,  2.04s/it]

tensor(6307, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1477/4921 [50:53<1:57:11,  2.04s/it]

tensor(15407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1478/4921 [50:55<1:55:55,  2.02s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1479/4921 [50:57<1:55:45,  2.02s/it]

tensor(5607, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1480/4921 [50:59<1:57:53,  2.06s/it]

tensor(5607, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1481/4921 [51:01<1:56:51,  2.04s/it]

tensor(7007, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1482/4921 [51:03<1:56:10,  2.03s/it]

tensor(4907, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1483/4921 [51:05<1:56:24,  2.03s/it]

tensor(11207, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1484/4921 [51:07<1:56:36,  2.04s/it]

tensor(11207, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1485/4921 [51:09<1:55:41,  2.02s/it]

tensor(4907, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1486/4921 [51:11<1:55:34,  2.02s/it]

tensor(5607, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1487/4921 [51:14<1:55:31,  2.02s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1488/4921 [51:16<1:58:48,  2.08s/it]

tensor(4907, device='cuda:0') tensor(5, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1489/4921 [51:18<1:57:45,  2.06s/it]

tensor(4207, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1490/4921 [51:20<1:57:30,  2.05s/it]

tensor(7707, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1491/4921 [51:22<1:56:54,  2.04s/it]

tensor(5607, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1492/4921 [51:24<1:56:38,  2.04s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1493/4921 [51:26<1:56:36,  2.04s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1494/4921 [51:28<1:57:04,  2.05s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1495/4921 [51:30<1:55:56,  2.03s/it]

tensor(5607, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1496/4921 [51:32<1:58:29,  2.08s/it]

tensor(4907, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1497/4921 [51:34<1:58:24,  2.07s/it]

tensor(4907, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1498/4921 [51:36<1:58:44,  2.08s/it]

tensor(4907, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1499/4921 [51:38<1:58:05,  2.07s/it]

tensor(5607, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 30%|███       | 1500/4921 [51:40<1:57:12,  2.06s/it]

tensor(6307, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1501/4921 [51:42<1:56:15,  2.04s/it]

tensor(4907, device='cuda:0') tensor(200, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1502/4921 [51:44<1:55:38,  2.03s/it]

tensor(4907, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1503/4921 [51:46<1:55:38,  2.03s/it]

tensor(4207, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1504/4921 [51:49<1:59:12,  2.09s/it]

tensor(9107, device='cuda:0') tensor(125, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1505/4921 [51:51<1:58:35,  2.08s/it]

tensor(6307, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1506/4921 [51:53<1:57:37,  2.07s/it]

tensor(8407, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1507/4921 [51:55<1:56:30,  2.05s/it]

tensor(8407, device='cuda:0') tensor(144, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1508/4921 [51:57<1:55:23,  2.03s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1509/4921 [51:59<1:54:21,  2.01s/it]

tensor(4907, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1510/4921 [52:01<1:54:30,  2.01s/it]

tensor(8407, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1511/4921 [52:03<1:54:35,  2.02s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1512/4921 [52:05<1:57:27,  2.07s/it]

tensor(4907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1513/4921 [52:07<1:56:21,  2.05s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1514/4921 [52:09<1:55:50,  2.04s/it]

tensor(5607, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1515/4921 [52:11<1:55:54,  2.04s/it]

tensor(4907, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1516/4921 [52:13<1:56:12,  2.05s/it]

tensor(4907, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1517/4921 [52:15<1:56:34,  2.05s/it]

tensor(4907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1518/4921 [52:17<1:57:07,  2.07s/it]

tensor(4907, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1519/4921 [52:19<1:56:43,  2.06s/it]

tensor(5607, device='cuda:0') tensor(21, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1520/4921 [52:21<1:58:49,  2.10s/it]

tensor(7007, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1521/4921 [52:23<1:57:27,  2.07s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1522/4921 [52:25<1:56:37,  2.06s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1523/4921 [52:28<1:57:31,  2.08s/it]

tensor(4907, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1524/4921 [52:30<1:57:51,  2.08s/it]

tensor(5607, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1525/4921 [52:32<1:57:42,  2.08s/it]

tensor(4207, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1526/4921 [52:34<1:56:36,  2.06s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1527/4921 [52:36<1:56:06,  2.05s/it]

tensor(7707, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1528/4921 [52:38<1:57:57,  2.09s/it]

tensor(5607, device='cuda:0') tensor(162, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1529/4921 [52:40<1:57:05,  2.07s/it]

tensor(4907, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1530/4921 [52:42<1:57:22,  2.08s/it]

tensor(4907, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1531/4921 [52:44<1:57:38,  2.08s/it]

tensor(7707, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1532/4921 [52:46<1:56:28,  2.06s/it]

tensor(7007, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1533/4921 [52:48<1:56:11,  2.06s/it]

tensor(8407, device='cuda:0') tensor(45, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1534/4921 [52:50<1:55:35,  2.05s/it]

tensor(9107, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1535/4921 [52:52<1:55:01,  2.04s/it]

tensor(4907, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1536/4921 [52:54<1:57:43,  2.09s/it]

tensor(6307, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███       | 1537/4921 [52:57<1:58:23,  2.10s/it]

tensor(4907, device='cuda:0') tensor(169, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1538/4921 [52:59<1:57:30,  2.08s/it]

tensor(4907, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1539/4921 [53:01<1:56:03,  2.06s/it]

tensor(4907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1540/4921 [53:03<1:55:24,  2.05s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1541/4921 [53:05<1:55:52,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1542/4921 [53:07<1:55:38,  2.05s/it]

tensor(6307, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1543/4921 [53:09<1:55:50,  2.06s/it]

tensor(7007, device='cuda:0') tensor(133, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1544/4921 [53:11<1:59:28,  2.12s/it]

tensor(5607, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1545/4921 [53:13<1:58:18,  2.10s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1546/4921 [53:15<1:57:04,  2.08s/it]

tensor(4907, device='cuda:0') tensor(223, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1547/4921 [53:17<1:56:34,  2.07s/it]

tensor(4907, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1548/4921 [53:19<1:56:25,  2.07s/it]

tensor(5607, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1549/4921 [53:21<1:56:40,  2.08s/it]

tensor(4207, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 31%|███▏      | 1550/4921 [53:24<1:56:08,  2.07s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1551/4921 [53:26<1:56:14,  2.07s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1552/4921 [53:28<1:59:09,  2.12s/it]

tensor(5607, device='cuda:0') tensor(34, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1553/4921 [53:30<1:58:08,  2.10s/it]

tensor(10507, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1554/4921 [53:32<1:56:59,  2.08s/it]

tensor(7007, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1555/4921 [53:34<1:56:19,  2.07s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1556/4921 [53:36<1:55:27,  2.06s/it]

tensor(7007, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1557/4921 [53:38<1:55:33,  2.06s/it]

tensor(5607, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1558/4921 [53:40<1:55:38,  2.06s/it]

tensor(4907, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1559/4921 [53:42<1:54:56,  2.05s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1560/4921 [53:44<1:56:46,  2.08s/it]

tensor(6307, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1561/4921 [53:46<1:55:57,  2.07s/it]

tensor(4907, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1562/4921 [53:48<1:55:24,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1563/4921 [53:50<1:54:54,  2.05s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1564/4921 [53:53<1:55:15,  2.06s/it]

tensor(6307, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1565/4921 [53:55<1:55:09,  2.06s/it]

tensor(4207, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1566/4921 [53:57<1:54:49,  2.05s/it]

tensor(4907, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1567/4921 [53:59<1:54:37,  2.05s/it]

tensor(4207, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1568/4921 [54:01<1:57:46,  2.11s/it]

tensor(7007, device='cuda:0') tensor(41, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1569/4921 [54:03<1:56:36,  2.09s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1570/4921 [54:05<1:56:28,  2.09s/it]

tensor(5607, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1571/4921 [54:07<1:56:36,  2.09s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1572/4921 [54:09<1:55:52,  2.08s/it]

tensor(6307, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1573/4921 [54:11<1:55:23,  2.07s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1574/4921 [54:13<1:55:30,  2.07s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1575/4921 [54:15<1:55:32,  2.07s/it]

tensor(4907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1576/4921 [54:18<1:58:32,  2.13s/it]

tensor(4907, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1577/4921 [54:20<1:57:51,  2.11s/it]

tensor(5607, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1578/4921 [54:22<1:56:57,  2.10s/it]

tensor(4907, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1579/4921 [54:24<1:56:05,  2.08s/it]

tensor(7007, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1580/4921 [54:26<1:55:50,  2.08s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1581/4921 [54:28<1:55:28,  2.07s/it]

tensor(7007, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1582/4921 [54:30<1:55:13,  2.07s/it]

tensor(5607, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1583/4921 [54:32<1:55:38,  2.08s/it]

tensor(4907, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1584/4921 [54:34<1:59:28,  2.15s/it]

tensor(4907, device='cuda:0') tensor(174, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1585/4921 [54:37<1:59:22,  2.15s/it]

tensor(4907, device='cuda:0') tensor(602, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1586/4921 [54:39<1:57:57,  2.12s/it]

tensor(11907, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1587/4921 [54:41<1:56:17,  2.09s/it]

tensor(5607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1588/4921 [54:43<1:55:09,  2.07s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1589/4921 [54:45<1:54:31,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1590/4921 [54:47<1:54:49,  2.07s/it]

tensor(4207, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1591/4921 [54:49<1:56:04,  2.09s/it]

tensor(4907, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1592/4921 [54:51<1:58:35,  2.14s/it]

tensor(5607, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1593/4921 [54:53<1:58:04,  2.13s/it]

tensor(5607, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1594/4921 [54:55<1:56:31,  2.10s/it]

tensor(7707, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1595/4921 [54:57<1:55:04,  2.08s/it]

tensor(6307, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1596/4921 [54:59<1:55:04,  2.08s/it]

tensor(4907, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1597/4921 [55:01<1:55:05,  2.08s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1598/4921 [55:04<1:55:09,  2.08s/it]

tensor(4907, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 32%|███▏      | 1599/4921 [55:06<1:55:02,  2.08s/it]

tensor(4207, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1600/4921 [55:08<1:57:49,  2.13s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1601/4921 [55:10<1:55:32,  2.09s/it]

tensor(4907, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1602/4921 [55:12<1:54:37,  2.07s/it]

tensor(4907, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1603/4921 [55:14<1:54:20,  2.07s/it]

tensor(4907, device='cuda:0') tensor(441, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1604/4921 [55:16<1:55:35,  2.09s/it]

tensor(14707, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1605/4921 [55:18<1:54:45,  2.08s/it]

tensor(5607, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1606/4921 [55:20<1:54:06,  2.07s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1607/4921 [55:22<1:53:15,  2.05s/it]

tensor(5607, device='cuda:0') tensor(262, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1608/4921 [55:24<1:55:21,  2.09s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1609/4921 [55:26<1:54:30,  2.07s/it]

tensor(5607, device='cuda:0') tensor(141, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1610/4921 [55:29<1:54:59,  2.08s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1611/4921 [55:31<1:55:22,  2.09s/it]

tensor(6307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1612/4921 [55:33<1:54:12,  2.07s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1613/4921 [55:35<1:54:02,  2.07s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1614/4921 [55:37<1:53:37,  2.06s/it]

tensor(4907, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1615/4921 [55:39<1:52:50,  2.05s/it]

tensor(4907, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1616/4921 [55:41<1:55:13,  2.09s/it]

tensor(4907, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1617/4921 [55:43<1:55:09,  2.09s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1618/4921 [55:45<1:54:07,  2.07s/it]

tensor(10507, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1619/4921 [55:47<1:53:31,  2.06s/it]

tensor(5607, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1620/4921 [55:49<1:52:41,  2.05s/it]

tensor(4907, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1621/4921 [55:51<1:52:07,  2.04s/it]

tensor(5607, device='cuda:0') tensor(159, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1622/4921 [55:53<1:52:01,  2.04s/it]

tensor(7007, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1623/4921 [55:55<1:51:47,  2.03s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1624/4921 [55:57<1:54:58,  2.09s/it]

tensor(5607, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1625/4921 [56:00<1:53:52,  2.07s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1626/4921 [56:02<1:52:58,  2.06s/it]

tensor(7007, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1627/4921 [56:04<1:52:37,  2.05s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1628/4921 [56:06<1:52:27,  2.05s/it]

tensor(5607, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1629/4921 [56:08<1:52:17,  2.05s/it]

tensor(4907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1630/4921 [56:10<1:51:28,  2.03s/it]

tensor(4907, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1631/4921 [56:12<1:52:27,  2.05s/it]

tensor(4207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1632/4921 [56:14<1:54:19,  2.09s/it]

tensor(4207, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1633/4921 [56:16<1:53:04,  2.06s/it]

tensor(7007, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1634/4921 [56:18<1:52:31,  2.05s/it]

tensor(5607, device='cuda:0') tensor(81, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1635/4921 [56:20<1:52:22,  2.05s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1636/4921 [56:22<1:52:18,  2.05s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1637/4921 [56:24<1:52:42,  2.06s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1638/4921 [56:26<1:53:48,  2.08s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1639/4921 [56:28<1:53:10,  2.07s/it]

tensor(4207, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1640/4921 [56:31<1:55:57,  2.12s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1641/4921 [56:33<1:54:40,  2.10s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1642/4921 [56:35<1:54:06,  2.09s/it]

tensor(4907, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1643/4921 [56:37<1:52:57,  2.07s/it]

tensor(6307, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1644/4921 [56:39<1:53:42,  2.08s/it]

tensor(4207, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1645/4921 [56:41<1:53:32,  2.08s/it]

tensor(4907, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1646/4921 [56:43<1:52:38,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1647/4921 [56:45<1:52:08,  2.06s/it]

tensor(4207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 33%|███▎      | 1648/4921 [56:47<1:54:01,  2.09s/it]

tensor(4207, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1649/4921 [56:49<1:53:07,  2.07s/it]

tensor(4207, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1650/4921 [56:51<1:53:19,  2.08s/it]

tensor(8407, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1651/4921 [56:53<1:53:32,  2.08s/it]

tensor(5607, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1652/4921 [56:55<1:53:30,  2.08s/it]

tensor(5607, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1653/4921 [56:57<1:52:19,  2.06s/it]

tensor(5607, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1654/4921 [56:59<1:51:30,  2.05s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1655/4921 [57:01<1:50:52,  2.04s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1656/4921 [57:04<1:52:36,  2.07s/it]

tensor(6307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1657/4921 [57:06<1:52:30,  2.07s/it]

tensor(4907, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1658/4921 [57:08<1:52:57,  2.08s/it]

tensor(7707, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1659/4921 [57:10<1:53:22,  2.09s/it]

tensor(5607, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▎      | 1660/4921 [57:12<1:52:16,  2.07s/it]

tensor(7007, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1661/4921 [57:14<1:51:49,  2.06s/it]

tensor(5607, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1662/4921 [57:16<1:51:25,  2.05s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1663/4921 [57:18<1:51:01,  2.04s/it]

tensor(4907, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1664/4921 [57:20<1:54:18,  2.11s/it]

tensor(4207, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1665/4921 [57:22<1:54:29,  2.11s/it]

tensor(4207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1666/4921 [57:24<1:53:44,  2.10s/it]

tensor(4207, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1667/4921 [57:26<1:53:05,  2.09s/it]

tensor(4207, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1668/4921 [57:29<1:53:10,  2.09s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1669/4921 [57:31<1:52:33,  2.08s/it]

tensor(8407, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1670/4921 [57:33<1:52:11,  2.07s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1671/4921 [57:35<1:52:58,  2.09s/it]

tensor(6307, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1672/4921 [57:37<1:55:29,  2.13s/it]

tensor(8407, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1673/4921 [57:39<1:53:55,  2.10s/it]

tensor(8407, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1674/4921 [57:41<1:52:55,  2.09s/it]

tensor(5607, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1675/4921 [57:43<1:53:09,  2.09s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1676/4921 [57:45<1:52:43,  2.08s/it]

tensor(5607, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1677/4921 [57:47<1:53:10,  2.09s/it]

tensor(9107, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1678/4921 [57:49<1:52:54,  2.09s/it]

tensor(6307, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1679/4921 [57:52<1:53:23,  2.10s/it]

tensor(6307, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1680/4921 [57:54<1:55:08,  2.13s/it]

tensor(5607, device='cuda:0') tensor(11, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1681/4921 [57:56<1:52:55,  2.09s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1682/4921 [57:58<1:52:14,  2.08s/it]

tensor(4907, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1683/4921 [58:00<1:52:26,  2.08s/it]

tensor(4207, device='cuda:0') tensor(18, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1684/4921 [58:02<1:52:46,  2.09s/it]

tensor(4207, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1685/4921 [58:04<1:52:29,  2.09s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1686/4921 [58:06<1:51:28,  2.07s/it]

tensor(4907, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1687/4921 [58:08<1:50:46,  2.06s/it]

tensor(4207, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1688/4921 [58:10<1:53:33,  2.11s/it]

tensor(6307, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1689/4921 [58:13<1:53:11,  2.10s/it]

tensor(6307, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1690/4921 [58:15<1:52:50,  2.10s/it]

tensor(5607, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1691/4921 [58:17<1:52:40,  2.09s/it]

tensor(6307, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1692/4921 [58:19<1:51:50,  2.08s/it]

tensor(4907, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1693/4921 [58:21<1:51:43,  2.08s/it]

tensor(7007, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1694/4921 [58:23<1:50:42,  2.06s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1695/4921 [58:25<1:49:51,  2.04s/it]

tensor(4907, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1696/4921 [58:27<1:52:07,  2.09s/it]

tensor(4907, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 34%|███▍      | 1697/4921 [58:29<1:51:59,  2.08s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1698/4921 [58:31<1:51:58,  2.08s/it]

tensor(7707, device='cuda:0') tensor(169, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1699/4921 [58:33<1:50:32,  2.06s/it]

tensor(7007, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1700/4921 [58:35<1:49:56,  2.05s/it]

tensor(7007, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1701/4921 [58:37<1:49:24,  2.04s/it]

tensor(5607, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1702/4921 [58:39<1:49:36,  2.04s/it]

tensor(4907, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1703/4921 [58:41<1:49:05,  2.03s/it]

tensor(4207, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1704/4921 [58:43<1:51:41,  2.08s/it]

tensor(4207, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1705/4921 [58:46<1:51:21,  2.08s/it]

tensor(4907, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1706/4921 [58:48<1:51:29,  2.08s/it]

tensor(6307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1707/4921 [58:50<1:51:16,  2.08s/it]

tensor(6307, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1708/4921 [58:52<1:51:08,  2.08s/it]

tensor(5607, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1709/4921 [58:54<1:50:22,  2.06s/it]

tensor(4907, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1710/4921 [58:56<1:50:17,  2.06s/it]

tensor(11207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1711/4921 [58:58<1:50:29,  2.07s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1712/4921 [59:00<1:53:27,  2.12s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1713/4921 [59:02<1:52:12,  2.10s/it]

tensor(4907, device='cuda:0') tensor(539, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1714/4921 [59:04<1:51:47,  2.09s/it]

tensor(4907, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1715/4921 [59:06<1:50:40,  2.07s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1716/4921 [59:08<1:50:09,  2.06s/it]

tensor(5607, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1717/4921 [59:10<1:49:58,  2.06s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1718/4921 [59:13<1:50:51,  2.08s/it]

tensor(4907, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1719/4921 [59:15<1:50:50,  2.08s/it]

tensor(4907, device='cuda:0') tensor(178, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1720/4921 [59:17<1:52:56,  2.12s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1721/4921 [59:19<1:52:20,  2.11s/it]

tensor(4207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▍      | 1722/4921 [59:21<1:51:53,  2.10s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1723/4921 [59:23<1:51:05,  2.08s/it]

tensor(4207, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1724/4921 [59:25<1:51:04,  2.08s/it]

tensor(4207, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1725/4921 [59:27<1:51:32,  2.09s/it]

tensor(7707, device='cuda:0') tensor(177, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1726/4921 [59:29<1:50:49,  2.08s/it]

tensor(5607, device='cuda:0') tensor(263, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1727/4921 [59:31<1:51:12,  2.09s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1728/4921 [59:34<1:53:30,  2.13s/it]

tensor(5607, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1729/4921 [59:36<1:52:30,  2.11s/it]

tensor(4907, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1730/4921 [59:38<1:51:40,  2.10s/it]

tensor(5607, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1731/4921 [59:40<1:52:12,  2.11s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1732/4921 [59:42<1:51:55,  2.11s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1733/4921 [59:44<1:51:54,  2.11s/it]

tensor(7007, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1734/4921 [59:46<1:51:11,  2.09s/it]

tensor(6307, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1735/4921 [59:48<1:50:35,  2.08s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1736/4921 [59:50<1:53:02,  2.13s/it]

tensor(4907, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1737/4921 [59:53<1:51:40,  2.10s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1738/4921 [59:55<1:51:16,  2.10s/it]

tensor(5607, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1739/4921 [59:57<1:50:01,  2.07s/it]

tensor(4907, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1740/4921 [59:59<1:49:05,  2.06s/it]

tensor(4207, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1741/4921 [1:00:01<1:48:19,  2.04s/it]

tensor(4207, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1742/4921 [1:00:03<1:48:18,  2.04s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1743/4921 [1:00:05<1:47:46,  2.03s/it]

tensor(7707, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1744/4921 [1:00:07<1:50:33,  2.09s/it]

tensor(6307, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1745/4921 [1:00:09<1:50:18,  2.08s/it]

tensor(4907, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 35%|███▌      | 1746/4921 [1:00:11<1:49:12,  2.06s/it]

tensor(6307, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1747/4921 [1:00:13<1:48:06,  2.04s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1748/4921 [1:00:15<1:47:33,  2.03s/it]

tensor(5607, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1749/4921 [1:00:17<1:48:08,  2.05s/it]

tensor(4207, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1750/4921 [1:00:19<1:48:08,  2.05s/it]

tensor(5607, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1751/4921 [1:00:21<1:49:34,  2.07s/it]

tensor(5607, device='cuda:0') tensor(142, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1752/4921 [1:00:24<1:52:58,  2.14s/it]

tensor(4907, device='cuda:0') tensor(15, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1753/4921 [1:00:26<1:51:22,  2.11s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1754/4921 [1:00:28<1:50:03,  2.09s/it]

tensor(4907, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1755/4921 [1:00:30<1:48:44,  2.06s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1756/4921 [1:00:32<1:48:36,  2.06s/it]

tensor(4907, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1757/4921 [1:00:34<1:48:18,  2.05s/it]

tensor(7707, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1758/4921 [1:00:36<1:49:07,  2.07s/it]

tensor(8407, device='cuda:0') tensor(69, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1759/4921 [1:00:38<1:48:06,  2.05s/it]

tensor(9107, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1760/4921 [1:00:40<1:49:11,  2.07s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1761/4921 [1:00:42<1:47:37,  2.04s/it]

tensor(7007, device='cuda:0') tensor(26, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1762/4921 [1:00:44<1:46:50,  2.03s/it]

tensor(8407, device='cuda:0') tensor(173, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1763/4921 [1:00:46<1:46:42,  2.03s/it]

tensor(7007, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1764/4921 [1:00:48<1:47:28,  2.04s/it]

tensor(5607, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1765/4921 [1:00:50<1:47:47,  2.05s/it]

tensor(7007, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1766/4921 [1:00:52<1:47:20,  2.04s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1767/4921 [1:00:54<1:46:54,  2.03s/it]

tensor(4207, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1768/4921 [1:00:56<1:48:09,  2.06s/it]

tensor(5607, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1769/4921 [1:00:58<1:47:09,  2.04s/it]

tensor(6307, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1770/4921 [1:01:00<1:46:28,  2.03s/it]

tensor(6307, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1771/4921 [1:01:02<1:47:33,  2.05s/it]

tensor(5607, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1772/4921 [1:01:04<1:47:25,  2.05s/it]

tensor(4907, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1773/4921 [1:01:06<1:46:11,  2.02s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1774/4921 [1:01:08<1:45:35,  2.01s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1775/4921 [1:01:10<1:44:58,  2.00s/it]

tensor(4907, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1776/4921 [1:01:12<1:47:18,  2.05s/it]

tensor(4207, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1777/4921 [1:01:14<1:46:38,  2.04s/it]

tensor(4207, device='cuda:0') tensor(137, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1778/4921 [1:01:17<1:47:00,  2.04s/it]

tensor(4207, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1779/4921 [1:01:19<1:46:43,  2.04s/it]

tensor(4207, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1780/4921 [1:01:21<1:46:26,  2.03s/it]

tensor(9107, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1781/4921 [1:01:23<1:46:08,  2.03s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1782/4921 [1:01:25<1:45:57,  2.03s/it]

tensor(4907, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▌      | 1783/4921 [1:01:27<1:46:15,  2.03s/it]

tensor(6307, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1784/4921 [1:01:29<1:47:59,  2.07s/it]

tensor(6307, device='cuda:0') tensor(137, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1785/4921 [1:01:31<1:47:55,  2.06s/it]

tensor(6307, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1786/4921 [1:01:33<1:47:05,  2.05s/it]

tensor(6307, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1787/4921 [1:01:35<1:47:22,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1788/4921 [1:01:37<1:47:13,  2.05s/it]

tensor(7007, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1789/4921 [1:01:39<1:47:42,  2.06s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1790/4921 [1:01:41<1:47:11,  2.05s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1791/4921 [1:01:43<1:46:58,  2.05s/it]

tensor(11907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1792/4921 [1:01:45<1:49:33,  2.10s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1793/4921 [1:01:47<1:48:28,  2.08s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1794/4921 [1:01:49<1:47:30,  2.06s/it]

tensor(4907, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1795/4921 [1:01:51<1:47:11,  2.06s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 36%|███▋      | 1796/4921 [1:01:54<1:47:07,  2.06s/it]

tensor(4207, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1797/4921 [1:01:56<1:46:07,  2.04s/it]

tensor(4207, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1798/4921 [1:01:58<1:47:08,  2.06s/it]

tensor(4207, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1799/4921 [1:02:00<1:47:06,  2.06s/it]

tensor(4207, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1800/4921 [1:02:02<1:49:07,  2.10s/it]

tensor(4207, device='cuda:0') tensor(201, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1801/4921 [1:02:04<1:48:47,  2.09s/it]

tensor(6307, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1802/4921 [1:02:06<1:48:05,  2.08s/it]

tensor(8407, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1803/4921 [1:02:08<1:47:31,  2.07s/it]

tensor(4907, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1804/4921 [1:02:10<1:47:22,  2.07s/it]

tensor(5607, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1805/4921 [1:02:12<1:47:29,  2.07s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1806/4921 [1:02:14<1:46:57,  2.06s/it]

tensor(7707, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1807/4921 [1:02:16<1:46:40,  2.06s/it]

tensor(4907, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1808/4921 [1:02:18<1:48:13,  2.09s/it]

tensor(4907, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1809/4921 [1:02:20<1:47:19,  2.07s/it]

tensor(4907, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1810/4921 [1:02:23<1:46:51,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1811/4921 [1:02:25<1:47:07,  2.07s/it]

tensor(4907, device='cuda:0') tensor(37, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1812/4921 [1:02:27<1:47:32,  2.08s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1813/4921 [1:02:29<1:47:47,  2.08s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1814/4921 [1:02:31<1:47:37,  2.08s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1815/4921 [1:02:33<1:47:18,  2.07s/it]

tensor(4907, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1816/4921 [1:02:35<1:50:05,  2.13s/it]

tensor(4207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1817/4921 [1:02:37<1:49:54,  2.12s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1818/4921 [1:02:39<1:49:27,  2.12s/it]

tensor(4907, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1819/4921 [1:02:42<1:49:33,  2.12s/it]

tensor(6307, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1820/4921 [1:02:44<1:48:15,  2.09s/it]

tensor(6307, device='cuda:0') tensor(178, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1821/4921 [1:02:46<1:46:57,  2.07s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1822/4921 [1:02:48<1:46:12,  2.06s/it]

tensor(4907, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1823/4921 [1:02:50<1:45:10,  2.04s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1824/4921 [1:02:52<1:48:21,  2.10s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1825/4921 [1:02:54<1:48:56,  2.11s/it]

tensor(7007, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1826/4921 [1:02:56<1:48:01,  2.09s/it]

tensor(4907, device='cuda:0') tensor(137, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1827/4921 [1:02:58<1:46:43,  2.07s/it]

tensor(4207, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1828/4921 [1:03:00<1:45:57,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1829/4921 [1:03:02<1:45:39,  2.05s/it]

tensor(6307, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1830/4921 [1:03:04<1:45:20,  2.04s/it]

tensor(6307, device='cuda:0') tensor(84, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1831/4921 [1:03:06<1:45:40,  2.05s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1832/4921 [1:03:08<1:48:30,  2.11s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1833/4921 [1:03:10<1:47:18,  2.08s/it]

tensor(4907, device='cuda:0') tensor(118, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1834/4921 [1:03:13<1:46:32,  2.07s/it]

tensor(4207, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1835/4921 [1:03:15<1:47:44,  2.09s/it]

tensor(4907, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1836/4921 [1:03:17<1:46:50,  2.08s/it]

tensor(5607, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1837/4921 [1:03:19<1:46:37,  2.07s/it]

tensor(4207, device='cuda:0') tensor(350, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1838/4921 [1:03:21<1:47:47,  2.10s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1839/4921 [1:03:23<1:49:20,  2.13s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1840/4921 [1:03:25<1:50:39,  2.15s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1841/4921 [1:03:27<1:48:53,  2.12s/it]

tensor(4207, device='cuda:0') tensor(125, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1842/4921 [1:03:29<1:47:41,  2.10s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1843/4921 [1:03:31<1:47:06,  2.09s/it]

tensor(4207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1844/4921 [1:03:34<1:47:21,  2.09s/it]

tensor(4907, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 37%|███▋      | 1845/4921 [1:03:36<1:47:40,  2.10s/it]

tensor(4207, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1846/4921 [1:03:38<1:47:09,  2.09s/it]

tensor(4907, device='cuda:0') tensor(28, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1847/4921 [1:03:40<1:45:58,  2.07s/it]

tensor(7707, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1848/4921 [1:03:42<1:47:56,  2.11s/it]

tensor(4207, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1849/4921 [1:03:44<1:47:18,  2.10s/it]

tensor(5607, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1850/4921 [1:03:46<1:46:59,  2.09s/it]

tensor(5607, device='cuda:0') tensor(91, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1851/4921 [1:03:48<1:47:21,  2.10s/it]

tensor(11907, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1852/4921 [1:03:50<1:46:58,  2.09s/it]

tensor(5607, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1853/4921 [1:03:52<1:46:27,  2.08s/it]

tensor(5607, device='cuda:0') tensor(247, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1854/4921 [1:03:54<1:45:47,  2.07s/it]

tensor(7707, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1855/4921 [1:03:56<1:45:36,  2.07s/it]

tensor(5607, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1856/4921 [1:03:59<1:47:35,  2.11s/it]

tensor(7707, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1857/4921 [1:04:01<1:46:22,  2.08s/it]

tensor(6307, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1858/4921 [1:04:03<1:46:22,  2.08s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1859/4921 [1:04:05<1:47:02,  2.10s/it]

tensor(12607, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1860/4921 [1:04:07<1:46:36,  2.09s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1861/4921 [1:04:09<1:45:43,  2.07s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1862/4921 [1:04:11<1:45:35,  2.07s/it]

tensor(4907, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1863/4921 [1:04:13<1:46:09,  2.08s/it]

tensor(4207, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1864/4921 [1:04:15<1:48:04,  2.12s/it]

tensor(4207, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1865/4921 [1:04:18<1:48:21,  2.13s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1866/4921 [1:04:20<1:47:42,  2.12s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1867/4921 [1:04:22<1:46:46,  2.10s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1868/4921 [1:04:24<1:46:19,  2.09s/it]

tensor(4207, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1869/4921 [1:04:26<1:45:03,  2.07s/it]

tensor(4207, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1870/4921 [1:04:28<1:44:33,  2.06s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1871/4921 [1:04:30<1:44:09,  2.05s/it]

tensor(7007, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1872/4921 [1:04:32<1:47:18,  2.11s/it]

tensor(4207, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1873/4921 [1:04:34<1:47:02,  2.11s/it]

tensor(4907, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1874/4921 [1:04:36<1:47:18,  2.11s/it]

tensor(4907, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1875/4921 [1:04:38<1:46:28,  2.10s/it]

tensor(4907, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1876/4921 [1:04:40<1:45:29,  2.08s/it]

tensor(5607, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1877/4921 [1:04:42<1:44:41,  2.06s/it]

tensor(5607, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1878/4921 [1:04:44<1:44:04,  2.05s/it]

tensor(5607, device='cuda:0') tensor(25, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1879/4921 [1:04:47<1:44:29,  2.06s/it]

tensor(6307, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1880/4921 [1:04:49<1:46:36,  2.10s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1881/4921 [1:04:51<1:45:55,  2.09s/it]

tensor(4907, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1882/4921 [1:04:53<1:45:06,  2.08s/it]

tensor(5607, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1883/4921 [1:04:55<1:44:12,  2.06s/it]

tensor(4907, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1884/4921 [1:04:57<1:43:39,  2.05s/it]

tensor(4907, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1885/4921 [1:04:59<1:43:36,  2.05s/it]

tensor(7007, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1886/4921 [1:05:01<1:43:26,  2.04s/it]

tensor(5607, device='cuda:0') tensor(90, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1887/4921 [1:05:03<1:42:16,  2.02s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1888/4921 [1:05:05<1:44:02,  2.06s/it]

tensor(4907, device='cuda:0') tensor(110, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1889/4921 [1:05:07<1:42:53,  2.04s/it]

tensor(4907, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1890/4921 [1:05:09<1:42:17,  2.03s/it]

tensor(4207, device='cuda:0') tensor(11, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1891/4921 [1:05:11<1:41:46,  2.02s/it]

tensor(8407, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1892/4921 [1:05:13<1:42:06,  2.02s/it]

tensor(7007, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1893/4921 [1:05:15<1:42:06,  2.02s/it]

tensor(6307, device='cuda:0') tensor(35, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 38%|███▊      | 1894/4921 [1:05:17<1:41:35,  2.01s/it]

tensor(4907, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1895/4921 [1:05:19<1:40:57,  2.00s/it]

tensor(5607, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1896/4921 [1:05:21<1:43:10,  2.05s/it]

tensor(5607, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1897/4921 [1:05:23<1:42:29,  2.03s/it]

tensor(4907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1898/4921 [1:05:25<1:43:20,  2.05s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1899/4921 [1:05:27<1:43:58,  2.06s/it]

tensor(4907, device='cuda:0') tensor(204, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1900/4921 [1:05:30<1:43:35,  2.06s/it]

tensor(4907, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1901/4921 [1:05:32<1:43:15,  2.05s/it]

tensor(4207, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1902/4921 [1:05:34<1:42:18,  2.03s/it]

tensor(8407, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1903/4921 [1:05:36<1:42:06,  2.03s/it]

tensor(4207, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1904/4921 [1:05:38<1:44:14,  2.07s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1905/4921 [1:05:40<1:44:01,  2.07s/it]

tensor(4907, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▊      | 1906/4921 [1:05:42<1:44:26,  2.08s/it]

tensor(4907, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1907/4921 [1:05:44<1:43:07,  2.05s/it]

tensor(4207, device='cuda:0') tensor(164, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1908/4921 [1:05:46<1:42:24,  2.04s/it]

tensor(4207, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1909/4921 [1:05:48<1:41:23,  2.02s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1910/4921 [1:05:50<1:41:09,  2.02s/it]

tensor(4207, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1911/4921 [1:05:52<1:41:24,  2.02s/it]

tensor(4207, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1912/4921 [1:05:54<1:44:41,  2.09s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1913/4921 [1:05:56<1:44:49,  2.09s/it]

tensor(5607, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1914/4921 [1:05:58<1:43:21,  2.06s/it]

tensor(4907, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1915/4921 [1:06:00<1:43:11,  2.06s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1916/4921 [1:06:02<1:42:57,  2.06s/it]

tensor(7707, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1917/4921 [1:06:04<1:43:05,  2.06s/it]

tensor(6307, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1918/4921 [1:06:06<1:42:47,  2.05s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1919/4921 [1:06:08<1:42:41,  2.05s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1920/4921 [1:06:11<1:45:02,  2.10s/it]

tensor(4907, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1921/4921 [1:06:13<1:43:31,  2.07s/it]

tensor(9107, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1922/4921 [1:06:15<1:43:25,  2.07s/it]

tensor(4207, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1923/4921 [1:06:17<1:43:15,  2.07s/it]

tensor(9107, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1924/4921 [1:06:19<1:43:03,  2.06s/it]

tensor(4907, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1925/4921 [1:06:21<1:43:54,  2.08s/it]

tensor(4907, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1926/4921 [1:06:23<1:44:06,  2.09s/it]

tensor(5607, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1927/4921 [1:06:25<1:43:35,  2.08s/it]

tensor(4207, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1928/4921 [1:06:27<1:45:28,  2.11s/it]

tensor(4207, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1929/4921 [1:06:29<1:44:53,  2.10s/it]

tensor(4207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1930/4921 [1:06:31<1:43:47,  2.08s/it]

tensor(4207, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1931/4921 [1:06:34<1:43:17,  2.07s/it]

tensor(4907, device='cuda:0') tensor(116, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1932/4921 [1:06:36<1:43:27,  2.08s/it]

tensor(4207, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1933/4921 [1:06:38<1:43:01,  2.07s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1934/4921 [1:06:40<1:42:10,  2.05s/it]

tensor(4207, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1935/4921 [1:06:42<1:42:35,  2.06s/it]

tensor(7007, device='cuda:0') tensor(145, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1936/4921 [1:06:44<1:44:37,  2.10s/it]

tensor(4207, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1937/4921 [1:06:46<1:43:41,  2.09s/it]

tensor(4907, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1938/4921 [1:06:48<1:43:06,  2.07s/it]

tensor(4907, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1939/4921 [1:06:50<1:43:03,  2.07s/it]

tensor(7007, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1940/4921 [1:06:52<1:43:05,  2.07s/it]

tensor(6307, device='cuda:0') tensor(204, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1941/4921 [1:06:54<1:42:34,  2.07s/it]

tensor(5607, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1942/4921 [1:06:56<1:42:03,  2.06s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 39%|███▉      | 1943/4921 [1:06:58<1:41:02,  2.04s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1944/4921 [1:07:00<1:42:38,  2.07s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1945/4921 [1:07:02<1:41:56,  2.06s/it]

tensor(4207, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1946/4921 [1:07:05<1:42:14,  2.06s/it]

tensor(4207, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1947/4921 [1:07:07<1:41:53,  2.06s/it]

tensor(11907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1948/4921 [1:07:09<1:41:33,  2.05s/it]

tensor(7007, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1949/4921 [1:07:11<1:40:46,  2.03s/it]

tensor(5607, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1950/4921 [1:07:13<1:40:07,  2.02s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1951/4921 [1:07:15<1:39:33,  2.01s/it]

tensor(7007, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1952/4921 [1:07:17<1:42:28,  2.07s/it]

tensor(5607, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1953/4921 [1:07:19<1:42:19,  2.07s/it]

tensor(5607, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1954/4921 [1:07:21<1:41:52,  2.06s/it]

tensor(4907, device='cuda:0') tensor(71, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1955/4921 [1:07:23<1:41:12,  2.05s/it]

tensor(4907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1956/4921 [1:07:25<1:40:38,  2.04s/it]

tensor(4907, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1957/4921 [1:07:27<1:40:04,  2.03s/it]

tensor(4207, device='cuda:0') tensor(132, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1958/4921 [1:07:29<1:40:41,  2.04s/it]

tensor(4207, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1959/4921 [1:07:31<1:41:28,  2.06s/it]

tensor(4207, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1960/4921 [1:07:33<1:44:21,  2.11s/it]

tensor(4207, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1961/4921 [1:07:35<1:43:35,  2.10s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1962/4921 [1:07:37<1:42:59,  2.09s/it]

tensor(6307, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1963/4921 [1:07:39<1:42:12,  2.07s/it]

tensor(4907, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1964/4921 [1:07:42<1:41:28,  2.06s/it]

tensor(7707, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1965/4921 [1:07:44<1:41:02,  2.05s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1966/4921 [1:07:46<1:41:38,  2.06s/it]

tensor(4907, device='cuda:0') tensor(591, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1967/4921 [1:07:48<1:42:11,  2.08s/it]

tensor(6307, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|███▉      | 1968/4921 [1:07:50<1:44:08,  2.12s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1969/4921 [1:07:52<1:42:49,  2.09s/it]

tensor(4207, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1970/4921 [1:07:54<1:42:09,  2.08s/it]

tensor(4207, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1971/4921 [1:07:56<1:41:48,  2.07s/it]

tensor(11907, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1972/4921 [1:07:58<1:42:32,  2.09s/it]

tensor(5607, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1973/4921 [1:08:00<1:43:08,  2.10s/it]

tensor(4907, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1974/4921 [1:08:02<1:42:43,  2.09s/it]

tensor(4907, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1975/4921 [1:08:04<1:42:20,  2.08s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1976/4921 [1:08:07<1:43:51,  2.12s/it]

tensor(4207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1977/4921 [1:08:09<1:42:30,  2.09s/it]

tensor(6307, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1978/4921 [1:08:11<1:41:56,  2.08s/it]

tensor(5607, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1979/4921 [1:08:13<1:43:12,  2.10s/it]

tensor(5607, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1980/4921 [1:08:15<1:43:11,  2.11s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1981/4921 [1:08:17<1:42:13,  2.09s/it]

tensor(4207, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1982/4921 [1:08:19<1:41:14,  2.07s/it]

tensor(4207, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1983/4921 [1:08:21<1:41:32,  2.07s/it]

tensor(5607, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1984/4921 [1:08:23<1:44:25,  2.13s/it]

tensor(7707, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1985/4921 [1:08:26<1:43:48,  2.12s/it]

tensor(7007, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1986/4921 [1:08:28<1:44:09,  2.13s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1987/4921 [1:08:30<1:43:06,  2.11s/it]

tensor(5607, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1988/4921 [1:08:32<1:42:08,  2.09s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1989/4921 [1:08:34<1:41:17,  2.07s/it]

tensor(4907, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1990/4921 [1:08:36<1:41:09,  2.07s/it]

tensor(5607, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1991/4921 [1:08:38<1:41:51,  2.09s/it]

tensor(5607, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1992/4921 [1:08:40<1:43:38,  2.12s/it]

tensor(4207, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 40%|████      | 1993/4921 [1:08:42<1:43:30,  2.12s/it]

tensor(7707, device='cuda:0') tensor(266, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 1994/4921 [1:08:44<1:42:25,  2.10s/it]

tensor(4907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 1995/4921 [1:08:46<1:42:17,  2.10s/it]

tensor(6307, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 1996/4921 [1:08:49<1:41:45,  2.09s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 1997/4921 [1:08:51<1:41:37,  2.09s/it]

tensor(4907, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 1998/4921 [1:08:53<1:41:19,  2.08s/it]

tensor(4907, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 1999/4921 [1:08:55<1:41:53,  2.09s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2000/4921 [1:08:57<1:43:49,  2.13s/it]

tensor(4207, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2001/4921 [1:08:59<1:42:12,  2.10s/it]

tensor(4207, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2002/4921 [1:09:01<1:41:28,  2.09s/it]

tensor(4207, device='cuda:0') tensor(61, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2003/4921 [1:09:03<1:40:38,  2.07s/it]

tensor(4207, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2004/4921 [1:09:05<1:40:07,  2.06s/it]

tensor(4207, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2005/4921 [1:09:07<1:39:32,  2.05s/it]

tensor(4207, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2006/4921 [1:09:09<1:39:42,  2.05s/it]

tensor(4207, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2007/4921 [1:09:11<1:39:50,  2.06s/it]

tensor(4207, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2008/4921 [1:09:14<1:41:34,  2.09s/it]

tensor(4207, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2009/4921 [1:09:16<1:41:13,  2.09s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2010/4921 [1:09:18<1:40:01,  2.06s/it]

tensor(7007, device='cuda:0') tensor(28, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2011/4921 [1:09:20<1:39:22,  2.05s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2012/4921 [1:09:22<1:39:18,  2.05s/it]

tensor(7707, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2013/4921 [1:09:24<1:39:29,  2.05s/it]

tensor(5607, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2014/4921 [1:09:26<1:40:13,  2.07s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2015/4921 [1:09:28<1:40:49,  2.08s/it]

tensor(4907, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2016/4921 [1:09:30<1:42:13,  2.11s/it]

tensor(4907, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2017/4921 [1:09:32<1:41:22,  2.09s/it]

tensor(4207, device='cuda:0') tensor(154, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2018/4921 [1:09:34<1:40:26,  2.08s/it]

tensor(4907, device='cuda:0') tensor(64, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2019/4921 [1:09:36<1:40:30,  2.08s/it]

tensor(4907, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2020/4921 [1:09:38<1:41:11,  2.09s/it]

tensor(4907, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2021/4921 [1:09:40<1:40:45,  2.08s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2022/4921 [1:09:43<1:39:50,  2.07s/it]

tensor(6307, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2023/4921 [1:09:45<1:38:51,  2.05s/it]

tensor(7707, device='cuda:0') tensor(76, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2024/4921 [1:09:47<1:40:34,  2.08s/it]

tensor(6307, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2025/4921 [1:09:49<1:39:42,  2.07s/it]

tensor(5607, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2026/4921 [1:09:51<1:40:11,  2.08s/it]

tensor(4907, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2027/4921 [1:09:53<1:40:55,  2.09s/it]

tensor(4907, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2028/4921 [1:09:55<1:39:47,  2.07s/it]

tensor(4907, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████      | 2029/4921 [1:09:57<1:39:23,  2.06s/it]

tensor(4207, device='cuda:0') tensor(147, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2030/4921 [1:09:59<1:38:44,  2.05s/it]

tensor(4207, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2031/4921 [1:10:01<1:38:00,  2.03s/it]

tensor(4207, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2032/4921 [1:10:03<1:40:41,  2.09s/it]

tensor(4207, device='cuda:0') tensor(143, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2033/4921 [1:10:05<1:40:51,  2.10s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2034/4921 [1:10:07<1:40:20,  2.09s/it]

tensor(8407, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2035/4921 [1:10:09<1:39:14,  2.06s/it]

tensor(6307, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2036/4921 [1:10:11<1:38:26,  2.05s/it]

tensor(5607, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2037/4921 [1:10:13<1:37:37,  2.03s/it]

tensor(4907, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2038/4921 [1:10:15<1:37:54,  2.04s/it]

tensor(4907, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2039/4921 [1:10:18<1:38:30,  2.05s/it]

tensor(4907, device='cuda:0') tensor(161, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2040/4921 [1:10:20<1:41:47,  2.12s/it]

tensor(6307, device='cuda:0') tensor(20, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2041/4921 [1:10:22<1:40:32,  2.09s/it]

tensor(7007, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 41%|████▏     | 2042/4921 [1:10:24<1:39:41,  2.08s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2043/4921 [1:10:26<1:39:29,  2.07s/it]

tensor(5607, device='cuda:0') tensor(66, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2044/4921 [1:10:28<1:39:05,  2.07s/it]

tensor(4907, device='cuda:0') tensor(124, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2045/4921 [1:10:30<1:38:48,  2.06s/it]

tensor(4907, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2046/4921 [1:10:32<1:39:48,  2.08s/it]

tensor(4907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2047/4921 [1:10:34<1:39:34,  2.08s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2048/4921 [1:10:36<1:40:56,  2.11s/it]

tensor(4207, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2049/4921 [1:10:38<1:39:51,  2.09s/it]

tensor(4207, device='cuda:0') tensor(109, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2050/4921 [1:10:41<1:39:36,  2.08s/it]

tensor(4207, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2051/4921 [1:10:43<1:39:26,  2.08s/it]

tensor(7007, device='cuda:0') tensor(132, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2052/4921 [1:10:45<1:40:07,  2.09s/it]

tensor(5607, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2053/4921 [1:10:47<1:40:22,  2.10s/it]

tensor(5607, device='cuda:0') tensor(6, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2054/4921 [1:10:49<1:39:55,  2.09s/it]

tensor(4907, device='cuda:0') tensor(122, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2055/4921 [1:10:51<1:38:50,  2.07s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2056/4921 [1:10:53<1:40:32,  2.11s/it]

tensor(4907, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2057/4921 [1:10:55<1:39:34,  2.09s/it]

tensor(4207, device='cuda:0') tensor(42, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2058/4921 [1:10:57<1:39:10,  2.08s/it]

tensor(4207, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2059/4921 [1:10:59<1:39:26,  2.08s/it]

tensor(4207, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2060/4921 [1:11:01<1:39:30,  2.09s/it]

tensor(4207, device='cuda:0') tensor(120, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2061/4921 [1:11:03<1:38:14,  2.06s/it]

tensor(4207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2062/4921 [1:11:05<1:37:33,  2.05s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2063/4921 [1:11:07<1:36:56,  2.04s/it]

tensor(4207, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2064/4921 [1:11:10<1:39:21,  2.09s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2065/4921 [1:11:12<1:38:19,  2.07s/it]

tensor(5607, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2066/4921 [1:11:14<1:38:02,  2.06s/it]

tensor(5607, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2067/4921 [1:11:16<1:37:45,  2.06s/it]

tensor(8407, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2068/4921 [1:11:18<1:37:24,  2.05s/it]

tensor(6307, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2069/4921 [1:11:20<1:36:35,  2.03s/it]

tensor(7707, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2070/4921 [1:11:22<1:36:38,  2.03s/it]

tensor(5607, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2071/4921 [1:11:24<1:36:48,  2.04s/it]

tensor(4907, device='cuda:0') tensor(78, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2072/4921 [1:11:26<1:38:59,  2.08s/it]

tensor(4907, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2073/4921 [1:11:28<1:38:49,  2.08s/it]

tensor(4207, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2074/4921 [1:11:30<1:38:25,  2.07s/it]

tensor(4207, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2075/4921 [1:11:32<1:37:41,  2.06s/it]

tensor(4207, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2076/4921 [1:11:34<1:38:19,  2.07s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2077/4921 [1:11:36<1:37:58,  2.07s/it]

tensor(5607, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2078/4921 [1:11:38<1:37:18,  2.05s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2079/4921 [1:11:40<1:37:00,  2.05s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2080/4921 [1:11:43<1:39:44,  2.11s/it]

tensor(4907, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2081/4921 [1:11:45<1:38:20,  2.08s/it]

tensor(4207, device='cuda:0') tensor(92, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2082/4921 [1:11:47<1:38:13,  2.08s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2083/4921 [1:11:49<1:38:03,  2.07s/it]

tensor(4207, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2084/4921 [1:11:51<1:36:51,  2.05s/it]

tensor(4207, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2085/4921 [1:11:53<1:36:15,  2.04s/it]

tensor(4207, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2086/4921 [1:11:55<1:36:08,  2.03s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2087/4921 [1:11:57<1:36:12,  2.04s/it]

tensor(12607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2088/4921 [1:11:59<1:37:48,  2.07s/it]

tensor(5607, device='cuda:0') tensor(157, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2089/4921 [1:12:01<1:37:22,  2.06s/it]

tensor(4207, device='cuda:0') tensor(95, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2090/4921 [1:12:03<1:36:41,  2.05s/it]

tensor(10507, device='cuda:0') tensor(49, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 42%|████▏     | 2091/4921 [1:12:05<1:36:40,  2.05s/it]

tensor(8407, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2092/4921 [1:12:07<1:36:54,  2.06s/it]

tensor(4207, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2093/4921 [1:12:09<1:37:29,  2.07s/it]

tensor(7007, device='cuda:0') tensor(160, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2094/4921 [1:12:11<1:37:18,  2.07s/it]

tensor(5607, device='cuda:0') tensor(200, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2095/4921 [1:12:13<1:37:02,  2.06s/it]

tensor(7007, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2096/4921 [1:12:16<1:38:31,  2.09s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2097/4921 [1:12:18<1:37:24,  2.07s/it]

tensor(4907, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2098/4921 [1:12:20<1:37:08,  2.06s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2099/4921 [1:12:22<1:37:19,  2.07s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2100/4921 [1:12:24<1:37:33,  2.08s/it]

tensor(4907, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2101/4921 [1:12:26<1:37:53,  2.08s/it]

tensor(4207, device='cuda:0') tensor(13, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2102/4921 [1:12:28<1:37:41,  2.08s/it]

tensor(4907, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2103/4921 [1:12:30<1:37:17,  2.07s/it]

tensor(4207, device='cuda:0') tensor(174, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2104/4921 [1:12:32<1:39:02,  2.11s/it]

tensor(4207, device='cuda:0') tensor(107, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2105/4921 [1:12:34<1:38:37,  2.10s/it]

tensor(7007, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2106/4921 [1:12:36<1:38:51,  2.11s/it]

tensor(5607, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2107/4921 [1:12:39<1:39:23,  2.12s/it]

tensor(5607, device='cuda:0') tensor(241, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2108/4921 [1:12:41<1:39:21,  2.12s/it]

tensor(5607, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2109/4921 [1:12:43<1:37:57,  2.09s/it]

tensor(4907, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2110/4921 [1:12:45<1:37:31,  2.08s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2111/4921 [1:12:47<1:36:42,  2.06s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2112/4921 [1:12:49<1:39:22,  2.12s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2113/4921 [1:12:51<1:39:16,  2.12s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2114/4921 [1:12:53<1:39:08,  2.12s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2115/4921 [1:12:55<1:37:49,  2.09s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2116/4921 [1:12:57<1:36:59,  2.07s/it]

tensor(4207, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2117/4921 [1:12:59<1:36:10,  2.06s/it]

tensor(7707, device='cuda:0') tensor(34, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2118/4921 [1:13:01<1:35:54,  2.05s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2119/4921 [1:13:04<1:36:15,  2.06s/it]

tensor(5607, device='cuda:0') tensor(32, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2120/4921 [1:13:06<1:39:38,  2.13s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2121/4921 [1:13:08<1:38:45,  2.12s/it]

tensor(4907, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2122/4921 [1:13:10<1:38:05,  2.10s/it]

tensor(5607, device='cuda:0') tensor(191, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2123/4921 [1:13:12<1:36:52,  2.08s/it]

tensor(4907, device='cuda:0') tensor(117, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2124/4921 [1:13:14<1:36:15,  2.06s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2125/4921 [1:13:16<1:36:49,  2.08s/it]

tensor(4207, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2126/4921 [1:13:18<1:37:28,  2.09s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2127/4921 [1:13:20<1:38:09,  2.11s/it]

tensor(4207, device='cuda:0') tensor(189, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2128/4921 [1:13:23<1:39:11,  2.13s/it]

tensor(4207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2129/4921 [1:13:25<1:38:04,  2.11s/it]

tensor(7007, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2130/4921 [1:13:27<1:37:10,  2.09s/it]

tensor(6307, device='cuda:0') tensor(172, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2131/4921 [1:13:29<1:36:44,  2.08s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2132/4921 [1:13:31<1:37:28,  2.10s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2133/4921 [1:13:33<1:37:46,  2.10s/it]

tensor(4907, device='cuda:0') tensor(185, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2134/4921 [1:13:35<1:38:17,  2.12s/it]

tensor(4907, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2135/4921 [1:13:37<1:37:32,  2.10s/it]

tensor(4207, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2136/4921 [1:13:39<1:38:54,  2.13s/it]

tensor(4207, device='cuda:0') tensor(94, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2137/4921 [1:13:42<1:38:23,  2.12s/it]

tensor(4207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2138/4921 [1:13:44<1:37:50,  2.11s/it]

tensor(4207, device='cuda:0') tensor(356, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2139/4921 [1:13:46<1:37:02,  2.09s/it]

tensor(4207, device='cuda:0') tensor(88, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 43%|████▎     | 2140/4921 [1:13:48<1:36:58,  2.09s/it]

tensor(4207, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2141/4921 [1:13:50<1:36:37,  2.09s/it]

tensor(7707, device='cuda:0') tensor(184, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2142/4921 [1:13:52<1:35:29,  2.06s/it]

tensor(9107, device='cuda:0') tensor(219, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2143/4921 [1:13:54<1:35:13,  2.06s/it]

tensor(6307, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2144/4921 [1:13:56<1:38:06,  2.12s/it]

tensor(5607, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2145/4921 [1:13:58<1:36:39,  2.09s/it]

tensor(4907, device='cuda:0') tensor(28, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2146/4921 [1:14:00<1:36:50,  2.09s/it]

tensor(6307, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2147/4921 [1:14:02<1:37:29,  2.11s/it]

tensor(6307, device='cuda:0') tensor(120, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2148/4921 [1:14:05<1:36:50,  2.10s/it]

tensor(4907, device='cuda:0') tensor(175, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2149/4921 [1:14:07<1:36:16,  2.08s/it]

tensor(4207, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2150/4921 [1:14:09<1:36:08,  2.08s/it]

tensor(4907, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2151/4921 [1:14:11<1:35:35,  2.07s/it]

tensor(4907, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▎     | 2152/4921 [1:14:13<1:37:53,  2.12s/it]

tensor(4907, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2153/4921 [1:14:15<1:37:41,  2.12s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2154/4921 [1:14:17<1:37:05,  2.11s/it]

tensor(4207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2155/4921 [1:14:19<1:36:22,  2.09s/it]

tensor(4207, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2156/4921 [1:14:21<1:36:10,  2.09s/it]

tensor(4207, device='cuda:0') tensor(130, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2157/4921 [1:14:23<1:35:10,  2.07s/it]

tensor(4207, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2158/4921 [1:14:25<1:34:50,  2.06s/it]

tensor(4207, device='cuda:0') tensor(201, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2159/4921 [1:14:27<1:35:04,  2.07s/it]

tensor(4207, device='cuda:0') tensor(75, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2160/4921 [1:14:30<1:37:43,  2.12s/it]

tensor(4207, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2161/4921 [1:14:32<1:37:07,  2.11s/it]

tensor(6307, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2162/4921 [1:14:34<1:36:38,  2.10s/it]

tensor(5607, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2163/4921 [1:14:36<1:36:06,  2.09s/it]

tensor(4907, device='cuda:0') tensor(263, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2164/4921 [1:14:38<1:35:59,  2.09s/it]

tensor(6307, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2165/4921 [1:14:40<1:35:41,  2.08s/it]

tensor(4207, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2166/4921 [1:14:42<1:35:50,  2.09s/it]

tensor(4907, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2167/4921 [1:14:44<1:36:12,  2.10s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2168/4921 [1:14:47<1:39:09,  2.16s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2169/4921 [1:14:49<1:37:52,  2.13s/it]

tensor(4207, device='cuda:0') tensor(39, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2170/4921 [1:14:51<1:36:19,  2.10s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2171/4921 [1:14:53<1:35:18,  2.08s/it]

tensor(4207, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2172/4921 [1:14:55<1:35:02,  2.07s/it]

tensor(4207, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2173/4921 [1:14:57<1:34:43,  2.07s/it]

tensor(4207, device='cuda:0') tensor(38, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2174/4921 [1:14:59<1:35:10,  2.08s/it]

tensor(4207, device='cuda:0') tensor(56, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2175/4921 [1:15:01<1:34:29,  2.06s/it]

tensor(4207, device='cuda:0') tensor(151, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2176/4921 [1:15:03<1:35:47,  2.09s/it]

tensor(9807, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2177/4921 [1:15:05<1:34:46,  2.07s/it]

tensor(6307, device='cuda:0') tensor(108, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2178/4921 [1:15:07<1:34:07,  2.06s/it]

tensor(5607, device='cuda:0') tensor(4, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2179/4921 [1:15:09<1:33:52,  2.05s/it]

tensor(5607, device='cuda:0') tensor(77, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2180/4921 [1:15:11<1:34:23,  2.07s/it]

tensor(4907, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2181/4921 [1:15:13<1:35:00,  2.08s/it]

tensor(6307, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2182/4921 [1:15:15<1:34:13,  2.06s/it]

tensor(5607, device='cuda:0') tensor(139, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2183/4921 [1:15:17<1:33:37,  2.05s/it]

tensor(5607, device='cuda:0') tensor(155, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2184/4921 [1:15:20<1:34:47,  2.08s/it]

tensor(4907, device='cuda:0') tensor(47, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2185/4921 [1:15:22<1:34:04,  2.06s/it]

tensor(6307, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2186/4921 [1:15:24<1:34:52,  2.08s/it]

tensor(4907, device='cuda:0') tensor(19, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2187/4921 [1:15:26<1:35:53,  2.10s/it]

tensor(4207, device='cuda:0') tensor(20, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2188/4921 [1:15:28<1:34:35,  2.08s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 44%|████▍     | 2189/4921 [1:15:30<1:33:28,  2.05s/it]

tensor(4907, device='cuda:0') tensor(12, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2190/4921 [1:15:32<1:32:48,  2.04s/it]

tensor(8407, device='cuda:0') tensor(94, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2191/4921 [1:15:34<1:32:52,  2.04s/it]

tensor(6307, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2192/4921 [1:15:36<1:34:40,  2.08s/it]

tensor(8407, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2193/4921 [1:15:38<1:34:59,  2.09s/it]

tensor(4907, device='cuda:0') tensor(131, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2194/4921 [1:15:40<1:34:12,  2.07s/it]

tensor(7707, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2195/4921 [1:15:42<1:33:22,  2.06s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2196/4921 [1:15:44<1:32:41,  2.04s/it]

tensor(5607, device='cuda:0') tensor(106, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2197/4921 [1:15:46<1:32:07,  2.03s/it]

tensor(4207, device='cuda:0') tensor(589, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2198/4921 [1:15:48<1:32:15,  2.03s/it]

tensor(4907, device='cuda:0') tensor(87, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2199/4921 [1:15:50<1:32:56,  2.05s/it]

tensor(4207, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2200/4921 [1:15:53<1:36:00,  2.12s/it]

tensor(4207, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2201/4921 [1:15:55<1:35:06,  2.10s/it]

tensor(4207, device='cuda:0') tensor(46, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2202/4921 [1:15:57<1:33:56,  2.07s/it]

tensor(4207, device='cuda:0') tensor(89, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2203/4921 [1:15:59<1:32:54,  2.05s/it]

tensor(4207, device='cuda:0') tensor(128, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2204/4921 [1:16:01<1:32:06,  2.03s/it]

tensor(4207, device='cuda:0') tensor(158, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2205/4921 [1:16:03<1:32:19,  2.04s/it]

tensor(4207, device='cuda:0') tensor(112, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2206/4921 [1:16:05<1:32:24,  2.04s/it]

tensor(4207, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2207/4921 [1:16:07<1:32:46,  2.05s/it]

tensor(3507, device='cuda:0') tensor(57, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2208/4921 [1:16:09<1:34:49,  2.10s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2209/4921 [1:16:11<1:33:37,  2.07s/it]

tensor(4207, device='cuda:0') tensor(79, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2210/4921 [1:16:13<1:32:34,  2.05s/it]

tensor(5607, device='cuda:0') tensor(53, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2211/4921 [1:16:15<1:32:35,  2.05s/it]

tensor(5607, device='cuda:0') tensor(96, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2212/4921 [1:16:17<1:32:36,  2.05s/it]

tensor(7007, device='cuda:0') tensor(29, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2213/4921 [1:16:19<1:32:34,  2.05s/it]

tensor(5607, device='cuda:0') tensor(63, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▍     | 2214/4921 [1:16:21<1:33:03,  2.06s/it]

tensor(5607, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2215/4921 [1:16:23<1:32:39,  2.05s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2216/4921 [1:16:26<1:33:51,  2.08s/it]

tensor(4907, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5498, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2217/4921 [1:16:28<1:33:25,  2.07s/it]

tensor(4207, device='cuda:0') tensor(68, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2218/4921 [1:16:30<1:32:56,  2.06s/it]

tensor(4907, device='cuda:0') tensor(86, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2219/4921 [1:16:32<1:32:19,  2.05s/it]

tensor(4207, device='cuda:0') tensor(103, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2220/4921 [1:16:34<1:32:16,  2.05s/it]

tensor(4907, device='cuda:0') tensor(62, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2221/4921 [1:16:36<1:32:07,  2.05s/it]

tensor(4207, device='cuda:0') tensor(173, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2222/4921 [1:16:38<1:31:22,  2.03s/it]

tensor(4207, device='cuda:0') tensor(51, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2223/4921 [1:16:40<1:31:04,  2.03s/it]

tensor(4207, device='cuda:0') tensor(31, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2224/4921 [1:16:42<1:33:39,  2.08s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2225/4921 [1:16:44<1:33:44,  2.09s/it]

tensor(4207, device='cuda:0') tensor(22, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2226/4921 [1:16:46<1:32:52,  2.07s/it]

tensor(4207, device='cuda:0') tensor(166, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2227/4921 [1:16:48<1:32:19,  2.06s/it]

tensor(6307, device='cuda:0') tensor(2, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2228/4921 [1:16:50<1:32:02,  2.05s/it]

tensor(7707, device='cuda:0') tensor(43, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2229/4921 [1:16:52<1:32:09,  2.05s/it]

tensor(5607, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2230/4921 [1:16:54<1:32:31,  2.06s/it]

tensor(7007, device='cuda:0') tensor(102, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2231/4921 [1:16:56<1:31:49,  2.05s/it]

tensor(5607, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2232/4921 [1:16:59<1:33:30,  2.09s/it]

tensor(4907, device='cuda:0') tensor(100, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2233/4921 [1:17:01<1:32:48,  2.07s/it]

tensor(4907, device='cuda:0') tensor(33, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2234/4921 [1:17:03<1:32:29,  2.07s/it]

tensor(4907, device='cuda:0') tensor(10, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2235/4921 [1:17:05<1:32:38,  2.07s/it]

tensor(5607, device='cuda:0') tensor(115, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2236/4921 [1:17:07<1:32:27,  2.07s/it]

tensor(4207, device='cuda:0') tensor(14, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2237/4921 [1:17:09<1:31:49,  2.05s/it]

tensor(4207, device='cuda:0') tensor(23, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2238/4921 [1:17:11<1:31:14,  2.04s/it]

tensor(7707, device='cuda:0') tensor(445, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 45%|████▌     | 2239/4921 [1:17:13<1:30:52,  2.03s/it]

tensor(5607, device='cuda:0') tensor(85, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2240/4921 [1:17:15<1:33:24,  2.09s/it]

tensor(4907, device='cuda:0') tensor(80, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2241/4921 [1:17:17<1:33:09,  2.09s/it]

tensor(4907, device='cuda:0') tensor(54, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2242/4921 [1:17:19<1:32:38,  2.07s/it]

tensor(4907, device='cuda:0') tensor(48, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2243/4921 [1:17:21<1:32:41,  2.08s/it]

tensor(4907, device='cuda:0') tensor(17, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2244/4921 [1:17:23<1:31:50,  2.06s/it]

tensor(5607, device='cuda:0') tensor(93, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2245/4921 [1:17:25<1:31:49,  2.06s/it]

tensor(5607, device='cuda:0') tensor(176, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2246/4921 [1:17:27<1:31:34,  2.05s/it]

tensor(4907, device='cuda:0') tensor(136, device='cuda:0')
torch.Size([32, 700, 700]) torch.Size([32, 700, 700])
tensor(6.5497, device='cuda:0', grad_fn=<NllLoss2DBackward0>)


 46%|████▌     | 2247/4921 [1:17:29<1:32:02,  2.07s/it]