In [1]:
import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio

In [2]:
class AudioUtil():
  # ----------------------------
  # Load an audio file. Return the signal as a tensor and the sample rate
  # ----------------------------
  @staticmethod
  def open(audio_file):
    sig, sr = torchaudio.load(audio_file)
    return (sig, sr)

  @staticmethod
  def rechannel(aud, new_channel):
    sig, sr = aud

    if (sig.shape[0] == new_channel):
      # Nothing to do
      return aud

    if (new_channel == 1):
      # Convert from stereo to mono by selecting only the first channel
      resig = sig[:1, :]
    else:
      # Convert from mono to stereo by duplicating the first channel
      resig = torch.cat([sig, sig])

    return ((resig, sr))

  @staticmethod
  def resample(aud, newsr):
    sig, sr = aud

    if (sr == newsr):
      # Nothing to do
      return aud

    num_channels = sig.shape[0]
    # Resample first channel
    resig = torchaudio.transforms.Resample(sr, newsr)(sig[:1,:])
    if (num_channels > 1):
      # Resample the second channel and merge both channels
      retwo = torchaudio.transforms.Resample(sr, newsr)(sig[1:,:])
      resig = torch.cat([resig, retwo])

    return ((resig, newsr))

  @staticmethod
  def pad_trunc(aud, max_ms):
    sig, sr = aud
    num_rows, sig_len = sig.shape
    max_len = sr//1000 * max_ms

    if (sig_len > max_len):
      # Truncate the signal to the given length
      sig = sig[:,:max_len]

    elif (sig_len < max_len):
      # Length of padding to add at the beginning and end of the signal
      pad_begin_len = random.randint(0, max_len - sig_len)
      pad_end_len = max_len - sig_len - pad_begin_len

      # Pad with 0s
      pad_begin = torch.zeros((num_rows, pad_begin_len))
      pad_end = torch.zeros((num_rows, pad_end_len))

      sig = torch.cat((pad_begin, sig, pad_end), 1)
      
    return (sig, sr)

  @staticmethod
  def time_shift(aud, shift_limit):
    sig,sr = aud
    _, sig_len = sig.shape
    shift_amt = int(random.random() * shift_limit * sig_len)
    return (sig.roll(shift_amt), sr)

  @staticmethod
  def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None):
    sig,sr = aud
    top_db = 80

    # spec has shape [channel, n_mels, time], where channel is mono, stereo etc
    spec = torchaudio.transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)

    # Convert to decibels
    spec = torchaudio.transforms.AmplitudeToDB(top_db=top_db)(spec)
    return (spec)

  @staticmethod
  def spectro_augment(spec, max_mask_pct=0.1, n_freq_masks=1, n_time_masks=1):
    _, n_mels, n_steps = spec.shape
    mask_value = spec.mean()
    aug_spec = spec

    freq_mask_param = max_mask_pct * n_mels
    for _ in range(n_freq_masks):
      aug_spec = torchaudio.transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)

    time_mask_param = max_mask_pct * n_steps
    for _ in range(n_time_masks):
      aug_spec = torchaudio.transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)

    return aug_spec

In [3]:
from torch.utils.data import DataLoader, Dataset, random_split
import torchaudio

# ----------------------------
# Sound Dataset
# ----------------------------
class SoundDS(Dataset):
  def __init__(self, df, data_path):
    self.df = df
    self.data_path = str(data_path)
    self.duration = 20000
    self.sr = 44100
    self.channel = 2
    self.shift_pct = 0.4
            
  # ----------------------------
  # Number of items in dataset
  # ----------------------------
  def __len__(self):
    return len(self.df)    
    
  # ----------------------------
  # Get i'th item in dataset
  # ----------------------------
  def __getitem__(self, idx):
    # Absolute file path of the audio file - concatenate the audio directory with
    # the relative path
    audio_file = self.data_path + "/smalldata" + "/wavs" + self.df.loc[idx, 'folder'] + "/"+ self.df.loc[idx, 'fileName']
    # Get the Class ID
    labels = {
        "acousticness" : self.df.loc[idx, 'acousticness'],
        "danceability" : self.df.loc[idx, 'danceability'],
        "energy" : self.df.loc[idx, 'energy'],
        "instrumentalness" : self.df.loc[idx, 'instrumentalness'],
        "liveness" : self.df.loc[idx, 'liveness'],
        "speechiness" : self.df.loc[idx, 'speechiness'],
        "tempo" : self.df.loc[idx, 'tempo'],
        "valence" : self.df.loc[idx, 'valence']
    }


    aud = AudioUtil.open(audio_file)
    # Some sounds have a higher sample rate, or fewer channels compared to the
    # majority. So make all sounds have the same number of channels and same 
    # sample rate. Unless the sample rate is the same, the pad_trunc will still
    # result in arrays of different lengths, even though the sound duration is
    # the same.
    reaud = AudioUtil.resample(aud, self.sr)
    rechan = AudioUtil.rechannel(reaud, self.channel)

    dur_aud = AudioUtil.pad_trunc(rechan, self.duration)
    shift_aud = AudioUtil.time_shift(dur_aud, self.shift_pct)
    sgram = AudioUtil.spectro_gram(shift_aud, n_mels=64, n_fft=1024, hop_len=None)
    aug_sgram = AudioUtil.spectro_augment(sgram, max_mask_pct=0.1, n_freq_masks=2, n_time_masks=2)

    return [aug_sgram, labels]

  def getSpectro(self,idx):
    audio_file = self.data_path + "/smalldata" + "/wavs" + self.df.loc[idx, 'folder'] + "/"+ self.df.loc[idx, 'fileName']
    aud = AudioUtil.open(audio_file)
    # Some sounds have a higher sample rate, or fewer channels compared to the
    # majority. So make all sounds have the same number of channels and same 
    # sample rate. Unless the sample rate is the same, the pad_trunc will still
    # result in arrays of different lengths, even though the sound duration is
    # the same.
    reaud = AudioUtil.resample(aud, self.sr)
    rechan = AudioUtil.rechannel(reaud, self.channel)

    dur_aud = AudioUtil.pad_trunc(rechan, self.duration)
    shift_aud = AudioUtil.time_shift(dur_aud, self.shift_pct)
    sgram = AudioUtil.spectro_gram(shift_aud, n_mels=64, n_fft=1024, hop_len=None)
    aug_sgram = AudioUtil.spectro_augment(sgram, max_mask_pct=0.1, n_freq_masks=2, n_time_masks=2)
    return aug_sgram

In [4]:
from torch.utils.data import DataLoader, Dataset, random_split
import torchaudio

# ----------------------------
# Sound Dataset
# ----------------------------
class SoundDS_NewData(Dataset):
  def __init__(self, df, data_path):
    self.df = df
    self.data_path = str(data_path)
    self.duration = 10000
    self.sr = 44100
    self.channel = 2
    self.shift_pct = 0.4
            
  # ----------------------------
  # Number of items in dataset
  # ----------------------------
  def __len__(self):
    return len(self.df)    
    
  def __getitem__(self, idx):
    audio_file = self.data_path + "/smalldata" + "/wavs" + self.df.loc[idx, 'folder'] + "/"+ self.df.loc[idx, 'fileName']
    aud = ''
    try:
        aud = AudioUtil.open(audio_file)
    except:
        audio_file = self.data_path + "/smalldata" + "/wavs" + "/000" + "/"+ "000002.wav"
        aud = AudioUtil.open(audio_file)
        
    aud = AudioUtil.open(audio_file)
    # Some sounds have a higher sample rate, or fewer channels compared to the
    # majority. So make all sounds have the same number of channels and same 
    # sample rate. Unless the sample rate is the same, the pad_trunc will still
    # result in arrays of different lengths, even though the sound duration is
    # the same.
    reaud = AudioUtil.resample(aud, self.sr)
    rechan = AudioUtil.rechannel(reaud, self.channel)

    dur_aud = AudioUtil.pad_trunc(rechan, self.duration)
    shift_aud = AudioUtil.time_shift(dur_aud, self.shift_pct)
    sgram = AudioUtil.spectro_gram(shift_aud, n_mels=64, n_fft=1024, hop_len=None)
    aug_sgram = AudioUtil.spectro_augment(sgram, max_mask_pct=0.1, n_freq_masks=2, n_time_masks=2)
    return aug_sgram, f"{self.df.loc[idx,'song_name']} by {self.df.loc[idx,'artist']}"

  def getName(self,idx):
        return self.df.loc[idx,"song_name"], self.df.loc[idx,"artist"]

In [7]:
import pandas as pd
datafile = pd.read_csv("./spotify_data3.csv")
# datafile = datafile.loc[datafile["folder"] == "/000"]
datafile.head()
len(datafile)

13123

In [8]:
from torch.utils.data import random_split

myds = SoundDS(datafile, ".")

In [9]:
num_items = len(myds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(myds, [num_train, num_val])
num_items
# myds.__getitem__(13122)

13123

In [11]:
len(val_ds)

2625

In [263]:
batch_size = 64

In [264]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [228]:
acousticDict = {"0": 0.0, "1" : 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
danceabilityDict = {"0": 0.0, "1": 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
energyDict = {"0": 0.0, "1": 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
instrumentalnessDict = {"0": 0.0, "1": 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
livenessDict = {"0": 0.0, "1": 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
speechinessDict = {"0": 0.0, "1": 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
valenceDict = {"0": 0.0, "1": 0.1, "2": 0.2, "3": 0.3, "4": 0.4, "5": 0.5, "6": 0.6, "7": 0.7, "8": 0.8, "9": 0.9, "10": 1.0}
tempo = {}
for i in range(250):
    tempo[f"{i}"] = i

In [690]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
from torchvision import datasets, transforms, models
from torch.nn import init

In [299]:
class MultilabelClassifier(nn.Module):
    def __init__(self):
        super().__init__()
    
        conv_layers = []

        # First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization
        self.conv1 = nn.Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        init.kaiming_normal_(self.conv1.weight, a=0.1)
        self.conv1.bias.data.zero_()
        conv_layers += [self.conv1, self.relu1, self.bn1]

        # Second Convolution Block
        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(16)
        init.kaiming_normal_(self.conv2.weight, a=0.1)
        self.conv2.bias.data.zero_()
        conv_layers += [self.conv2, self.relu2, self.bn2]

        # Second Convolution Block
        self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(32)
        init.kaiming_normal_(self.conv3.weight, a=0.1)
        self.conv3.bias.data.zero_()
        conv_layers += [self.conv3, self.relu3, self.bn3]

        # Second Convolution Block
        self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu4 = nn.ReLU()
        self.bn4 = nn.BatchNorm2d(64)
        init.kaiming_normal_(self.conv4.weight, a=0.1)
        self.conv4.bias.data.zero_()
        conv_layers += [self.conv4, self.relu4, self.bn4]
        
        self.conv = nn.Sequential(*conv_layers)
        self.ap = nn.AdaptiveAvgPool2d(output_size=1)
        self.sig = nn.Sigmoid()
        self.lin = nn.Linear(in_features=64, out_features=7)

        
    def forward(self, x):

        x = self.conv(x)
        x = self.ap(x)
        x = x.view(x.shape[0], -1)
        x = self.lin(x)
#         x = self.lin2(x)
#         x = self.lin3(x)
        x = self.sig(x)
#         print(x.shape)
        
        return x

In [300]:
labelList = ["acousticness", "danceability", "energy",
             "instrumentalness", "liveness", "speechiness", "valence"]
def extractLabels(labels, index):
    newLabels = []
    for label in labelList:
        newLabels.append(labels[label][index])
    return torch.Tensor(newLabels)

In [398]:

def criterion(loss_func,outputs,labels):
  losses = 0
  lossList = []
  for i, key in enumerate(outputs):
#     print("len labels = ", len(extractLabels(labels,i)) )
#     print("len otput[i] = ", len( outputs[i]) )
#     print("labels: ",extractLabels(labels,i))
#     print("output: ", outputs[i])
    newLoss = loss_func(outputs[i], (extractLabels(labels,i)).to(device))
#     print("loss: ",newLoss)
    losses += newLoss
    lossList.append(newLoss)
#     losses += loss_func(outputs[i], 
#               ((labels[labelList[i]]).type(torch.LongTensor)).to(device))
  return [losses,lossList]

In [399]:
def training(model,device,lr_rate,epochs,train_loader):
  num_epochs = epochs
  losses = []
  checkpoint_losses = []

  optimizer = torch.optim.Adam(model.parameters(), lr=lr_rate)
  n_total_steps = len(train_loader)

  loss_func = nn.CrossEntropyLoss()
#   loss_func = nn.MSELoss

  for epoch in range(num_epochs):
     for i, data in enumerate(train_loader):
        inputs = data[0].to(device)
        labels = data[1]

        # Normalize the inputs
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s
        
        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(loss_func, outputs, labels)[0]
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % (int(n_total_steps/1)) == 0:
            checkpoint_loss = torch.tensor(losses).mean().item()
            checkpoint_losses.append(checkpoint_loss)
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {checkpoint_loss:.4f}')
  return checkpoint_losses

In [407]:
def checkAccuracy(model, testloader, criterion, loss_func,batch_size):
    totalLosses = [0,0,0,0,0,0,0]
    testLen = len(testloader)
    with torch.no_grad():
        for data in testloader:
            inputs = data[0].to(device)
            labels = data[1]
            
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s
            
            outputs = model(inputs)
            lossList = criterion(loss_func, outputs, labels)[1]
            
            
            for i in range(len(lossList)):
                totalLosses[i%7] += lossList[i] 
    totalLoss = 0
    print("End of epoch validation losses...:")
    for index, loss in enumerate(totalLosses):
        print(f"{labelList[index]} loss: {(loss/testLen):.2f}")
        totalLoss += loss
    
    print(f"Average total loss: {totalLoss/testLen:.2f}")
    return totalLosses        

In [424]:
def training2(model, train_dl, num_epochs, batch_size):
  # Loss Function, Optimizer and Scheduler
#   criterion = nn.CrossEntropyLoss()
#   loss_func = nn.CrossEntropyLoss()
  lossList = []
  labelLosses = [[],[],[],[],[],[],[]]
  loss_func = nn.MSELoss() 
#   optimizer = torch.optim.Adam(model.parameters(),lr=0.005)
  optimizer = torch.optim.SGD(model.parameters(),lr=0.005)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.005,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

  # Repeat for each epoch
  for epoch in range(num_epochs):
    print(f"Epoch {epoch}")
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0
    count = 1

    # Repeat for each batch in the training set
    for i, data in enumerate(train_dl):
        # Get the input features and target labels, and put them on the GPU
        inputs = data[0].to(device)
        labels = data[1]

        # Normalize the inputs
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s

        # Zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
#         print(outputs)
        loss = criterion(loss_func, outputs, labels)[0]
        
#         loss = loss_func(outputs,labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Keep stats for Loss and Accuracy
        running_loss += loss.item()
    
        if count % 30 == 0:
            print(f"average loss: {running_loss / count}")
        count += 1

    
    # Print stats at the end of the epoch
    num_batches = len(train_dl)
    avg_loss = running_loss / num_batches
    lossList.append(avg_loss)
    separateLosses = checkAccuracy(model, val_dl, criterion, loss_func, batch_size)
    for ind,loss in enumerate(separateLosses):
        labelLosses[ind].append(loss)
#     acc = correct_prediction/total_prediction
#     print(f'Epoch: {epoch}, Loss: {avg_loss:.2f}, Accuracy: {acc:.2f}')

  print('Finished Training')
  return lossList, separateLosses

In [425]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"

device 

device(type='cuda')

In [427]:
modelFinale2 = MultilabelClassifier()
modelFinale2.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [430]:
for param in modelFinale2.parameters():
    if len(param.shape) > 1: # Initialize only the weight tensors
        init.normal_(param, mean=0, std=0.01)

In [431]:
start = time.time()
epochs = 5
averageLosses, separateLosses = training2(modelFinale2, train_dl, 3, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 6.523805634180705
average loss: 5.489678764343262
average loss: 4.833121795124478
average loss: 4.468333411216736
average loss: 4.237885862986246
End of epoch validation losses...:
acousticness loss: 0.50
danceability loss: 0.43
energy loss: 0.44
instrumentalness loss: 0.43
liveness loss: 0.44
speechiness loss: 0.43
valence loss: 0.40
Average total loss: 3.07
Epoch 1
average loss: 3.141201543807983
average loss: 3.1188626170158384
average loss: 3.0855477200614083
average loss: 3.0652604679266613
average loss: 3.0476166645685834
End of epoch validation losses...:
acousticness loss: 0.45
danceability loss: 0.38
energy loss: 0.40
instrumentalness loss: 0.40
liveness loss: 0.42
speechiness loss: 0.40
valence loss: 0.37
Average total loss: 2.82
Epoch 2
average loss: 2.9233943859736127
average loss: 2.890669310092926
average loss: 2.862364739841885
average loss: 2.8683715124924976
average loss: 2.8457012669245403
End of epoch validation losses...:
acousticness loss: 0.4

In [414]:
modelFinale = MultilabelClassifier()
modelFinale.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [415]:
start = time.time()
epochs = 5
averageLosses, separateLosses = training2(modelFinale, train_dl, 5, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 6.411480458577474
average loss: 5.932156125704448
average loss: 5.4403713358773125
average loss: 4.956944215297699
average loss: 4.571863066355387
End of epoch validation losses...:
acousticness loss: 0.47
danceability loss: 0.42
energy loss: 0.42
instrumentalness loss: 0.41
liveness loss: 0.43
speechiness loss: 0.40
valence loss: 0.39
Average total loss: 2.95
Epoch 1
average loss: 2.998306703567505
average loss: 2.9782835364341738
average loss: 2.936781716346741
average loss: 2.8901377856731414
average loss: 2.869297563234965
End of epoch validation losses...:
acousticness loss: 0.41
danceability loss: 0.36
energy loss: 0.38
instrumentalness loss: 0.37
liveness loss: 0.37
speechiness loss: 0.37
valence loss: 0.36
Average total loss: 2.63
Epoch 2
average loss: 2.710949985186259
average loss: 2.6827095627784727
average loss: 2.6767201821009317
average loss: 2.6477932105461757
average loss: 2.6436170077323915
End of epoch validation losses...:
acousticness loss: 0.4

In [416]:
torch.save(modelFinale.state_dict(), "./final.pt")

In [417]:
batch_size = 32# choose an appropriate batch size
new_loader = DataLoader(toBeLabeledData, batch_size=batch_size)

# Evaluate the model on the new set of spectrograms
modelFinale.eval()
with torch.no_grad():
    for batch in val_dl:
        inputs = batch[0].to(device)
        outputs = modelFinale(inputs)
        for i in range(batch_size):
            print("guess:",outputs[i])
            print("label:",extractLabels(batch[1],i))
#         print(outputs)
#         print(batch[1])

guess: tensor([1.7908e-16, 9.9462e-01, 1.0000e+00, 1.0000e+00, 5.8061e-01, 9.9921e-01,
        5.7743e-09], device='cuda:0')
label: tensor([0.7451, 0.6919, 0.4925, 0.8498, 0.1258, 0.0856, 0.8678])
guess: tensor([7.1825e-09, 3.5001e-01, 1.0000e+00, 1.0000e+00, 9.9946e-01, 9.9990e-01,
        2.4636e-10], device='cuda:0')
label: tensor([0.0021, 0.4327, 0.8246, 0.1568, 0.7232, 0.0538, 0.2460])
guess: tensor([1.0000e+00, 4.1300e-03, 9.9982e-01, 1.0000e+00, 5.0836e-01, 6.9262e-02,
        4.2028e-08], device='cuda:0')
label: tensor([0.7301, 0.4057, 0.3299, 0.9117, 0.2025, 0.0616, 0.0422])
guess: tensor([1.7805e-07, 7.7593e-01, 1.0000e+00, 1.0000e+00, 1.2630e-01, 3.4860e-02,
        2.2203e-03], device='cuda:0')
label: tensor([0.0118, 0.3882, 0.9073, 0.9266, 0.2879, 0.0380, 0.9061])
guess: tensor([1.5667e-02, 9.7962e-01, 1.0000e+00, 1.0000e+00, 7.0395e-01, 9.6169e-01,
        1.5341e-05], device='cuda:0')
label: tensor([0.0145, 0.6382, 0.7145, 0.3847, 0.1367, 0.0292, 0.5496])
guess: tensor([

KeyboardInterrupt: 

In [366]:
modelSGDLR2 = MultilabelClassifier()
modelSGDLR2.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [367]:
start = time.time()
epochs = 5
training2(modelSGDLR2, train_dl, 1, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
len labels =  7
len otput[i] =  7
labels:  tensor([0.3132, 0.4223, 0.6460, 0.8445, 0.1646, 0.0280, 0.9194])
output:  tensor([0.5163, 0.5191, 0.4576, 0.4416, 0.5274, 0.5026, 0.4271],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1211, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4824, 0.0830, 0.5390, 0.9215, 0.7600, 0.0453, 0.3913])
output:  tensor([0.4155, 0.5175, 0.4045, 0.5023, 0.4730, 0.5327, 0.4630],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1017, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2397, 0.5904, 0.6970, 0.9353, 0.1113, 0.0293, 0.3429])
output:  tensor([0.5046, 0.4999, 0.5195, 0.5213, 0.5409, 0.4972, 0.4920],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1010, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9580e-01, 2.9346e-01, 1.1847e-01, 2.598

output:  tensor([0.4368, 0.5464, 0.3907, 0.4612, 0.4814, 0.5425, 0.4709],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1730, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0572, 0.4606, 0.0799, 0.9096, 0.1138, 0.0549, 0.0374])
output:  tensor([0.4899, 0.5192, 0.5488, 0.5385, 0.4467, 0.5476, 0.4446],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1525, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9771, 0.4655, 0.4215, 0.9169, 0.0864, 0.0310, 0.8278])
output:  tensor([0.4718, 0.5430, 0.4659, 0.4679, 0.5571, 0.5298, 0.4497],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1540, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([7.9585e-01, 3.3334e-01, 3.1172e-01, 6.8400e-08, 5.9705e-02, 3.6649e-02,
        1.3334e-01])
output:  tensor([0.4120, 0.4961, 0.4319, 0.4378, 0.5355, 0.4633,

loss:  tensor(0.1394, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9936, 0.2611, 0.0354, 0.8659, 0.6835, 0.0474, 0.1491])
output:  tensor([0.6570, 0.5856, 0.4946, 0.5123, 0.5002, 0.4486, 0.4530],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1202, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0195, 0.6015, 0.8618, 0.0119, 0.6076, 0.0558, 0.8424])
output:  tensor([0.4350, 0.5286, 0.5654, 0.5189, 0.5136, 0.5290, 0.4240],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1329, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0721, 0.6047, 0.7915, 0.5922, 0.1128, 0.0446, 0.6917])
output:  tensor([0.4655, 0.5177, 0.4172, 0.4781, 0.5123, 0.5229, 0.4912],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1063, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.1192, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2271, 0.4960, 0.3286, 0.5443, 0.0889, 0.0299, 0.0832])
output:  tensor([0.4852, 0.5371, 0.5269, 0.4861, 0.5201, 0.5110, 0.4585],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0956, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7420, 0.1431, 0.9456, 0.7152, 0.1320, 0.0583, 0.1232])
output:  tensor([0.4680, 0.5518, 0.3977, 0.4371, 0.5191, 0.6117, 0.4974],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1737, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5173, 0.5540, 0.8971, 0.0021, 0.0647, 0.0420, 0.7263])
output:  tensor([0.4664, 0.4875, 0.4537, 0.4723, 0.4977, 0.5324, 0.4967],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1294, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

len labels =  7
len otput[i] =  7
labels:  tensor([0.0023, 0.8492, 0.4651, 0.9323, 0.0762, 0.0446, 0.8614])
output:  tensor([0.4811, 0.4895, 0.4949, 0.4915, 0.5632, 0.4957, 0.4517],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1661, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0502, 0.6753, 0.5491, 0.8695, 0.0724, 0.1228, 0.3990])
output:  tensor([0.4619, 0.5114, 0.4388, 0.4794, 0.5362, 0.5489, 0.4884],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1093, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1236, 0.6187, 0.7535, 0.0015, 0.5139, 0.1292, 0.5696])
output:  tensor([0.4686, 0.5260, 0.4685, 0.5070, 0.5507, 0.5178, 0.4814],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0892, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3205, 0.7357, 0.6871, 0.1819, 0.0714, 0.0353, 0

labels:  tensor([0.8036, 0.3003, 0.4896, 0.6492, 0.1062, 0.0315, 0.6207])
output:  tensor([0.4546, 0.5323, 0.4792, 0.4650, 0.5294, 0.5319, 0.4449],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0957, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9571e-01, 5.5386e-01, 1.2762e-01, 5.1155e-05, 7.2664e-01, 6.9491e-02,
        5.2712e-01])
output:  tensor([0.5136, 0.5326, 0.5306, 0.4665, 0.3938, 0.5272, 0.3942],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1358, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.5489, 0.0608, 0.9679, 0.2668, 0.0376, 0.4155])
output:  tensor([0.5997, 0.5062, 0.5033, 0.4709, 0.3401, 0.5105, 0.6150],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1243, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0699, 0.5970, 0.8489, 0.4534, 0.0640, 0.0298,

loss:  tensor(0.0761, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9837, 0.3733, 0.4370, 0.2052, 0.7895, 0.3351, 0.6416])
output:  tensor([0.5514, 0.5351, 0.4994, 0.4837, 0.4819, 0.4610, 0.4700],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0621, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.2097e-01, 9.4280e-01, 6.0609e-01, 1.9900e-07, 1.7000e-01, 2.8460e-01,
        3.9691e-01])
output:  tensor([0.5340, 0.4981, 0.5482, 0.5264, 0.5521, 0.4696, 0.4945],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1198, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6083, 0.6241, 0.3002, 0.7680, 0.3571, 0.1249, 0.6918])
output:  tensor([0.5037, 0.5260, 0.5183, 0.5425, 0.5273, 0.5031, 0.4839],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0477, device='cuda:0', grad_fn=<MseLossBackward0

loss:  tensor(0.0972, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.4900e-04, 4.2778e-01, 8.9731e-01, 1.7576e-03, 1.1063e-01, 5.0075e-02,
        5.8152e-01])
output:  tensor([0.4522, 0.5155, 0.4628, 0.4886, 0.4561, 0.5296, 0.4439],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1437, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0172, 0.5717, 0.7650, 0.8276, 0.1027, 0.2342, 0.3013])
output:  tensor([0.4366, 0.5219, 0.4593, 0.4822, 0.5545, 0.4902, 0.4308],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0968, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9401, 0.4407, 0.6710, 0.9689, 0.0803, 0.0678, 0.6474])
output:  tensor([0.4351, 0.5362, 0.4496, 0.4726, 0.4835, 0.5773, 0.4392],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0

output:  tensor([0.5014, 0.5234, 0.4931, 0.5014, 0.5921, 0.4832, 0.4522],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1073, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5854, 0.2975, 0.9816, 0.5232, 0.0992, 0.1305, 0.0380])
output:  tensor([0.4901, 0.5351, 0.4157, 0.4357, 0.5194, 0.5811, 0.5187],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.2089e-02, 5.3651e-01, 8.4085e-01, 5.2612e-04, 1.2610e-01, 2.6799e-02,
        5.8399e-01])
output:  tensor([0.4044, 0.5332, 0.3870, 0.4520, 0.4812, 0.6152, 0.4729],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1498, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8912, 0.4376, 0.5505, 0.9266, 0.1239, 0.0345, 0.6208])
output:  tensor([0.4444, 0.5200, 0.4456, 0.4953, 0.4938, 0.5201,

loss:  tensor(0.1820, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8579, 0.5550, 0.6495, 0.2905, 0.0864, 0.1723, 0.9045])
output:  tensor([0.4912, 0.4913, 0.5100, 0.4839, 0.5551, 0.4876, 0.4560],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1022, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9206, 0.7882, 0.9474, 0.9079, 0.1105, 0.1114, 0.9578])
output:  tensor([0.5588, 0.5192, 0.5043, 0.4853, 0.5432, 0.5025, 0.5160],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9069, 0.6331, 0.1028, 0.3818, 0.0979, 0.1603, 0.5734])
output:  tensor([0.4896, 0.5087, 0.4165, 0.5183, 0.4508, 0.5294, 0.5271],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0814, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

len labels =  7
len otput[i] =  7
labels:  tensor([0.7543, 0.6601, 0.6495, 0.7380, 0.1909, 0.1673, 0.1976])
output:  tensor([0.4657, 0.5195, 0.4984, 0.4972, 0.5784, 0.4989, 0.4641],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0736, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6984, 0.7331, 0.8320, 0.8827, 0.1346, 0.0329, 0.9000])
output:  tensor([0.5084, 0.4977, 0.4459, 0.4465, 0.6047, 0.5404, 0.5349],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1490, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.2279e-05, 3.2007e-01, 7.5132e-01, 8.3070e-01, 2.3459e-01, 4.2849e-01,
        4.9749e-02])
output:  tensor([0.4233, 0.5346, 0.4616, 0.5445, 0.4605, 0.5303, 0.4571],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0883, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8807, 0.421

len labels =  7
len otput[i] =  7
labels:  tensor([0.8663, 0.7195, 0.2838, 0.0681, 0.1806, 0.8023, 0.8781])
output:  tensor([0.4928, 0.5112, 0.4880, 0.4936, 0.4578, 0.4767, 0.4371],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1119, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0865, 0.4285, 0.2864, 0.8154, 0.0997, 0.0736, 0.1751])
output:  tensor([0.4723, 0.5311, 0.5042, 0.5214, 0.4881, 0.5246, 0.4410],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1026, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3779, 0.3629, 0.7550, 0.8091, 0.0339, 0.0427, 0.6055])
output:  tensor([0.4475, 0.4983, 0.4832, 0.4977, 0.4972, 0.4773, 0.4864],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0874, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3303, 0.3985, 0.6588, 0.8817, 0.0970, 0.0425, 0

output:  tensor([0.4468, 0.5276, 0.4441, 0.4462, 0.5165, 0.5303, 0.4700],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1088, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.9478e-01, 7.2714e-01, 6.3731e-01, 1.1086e-05, 4.1149e-01, 1.2427e-01,
        4.1102e-01])
output:  tensor([0.4837, 0.5033, 0.4958, 0.4561, 0.5479, 0.5181, 0.4541],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0767, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9469, 0.5418, 0.2848, 0.7385, 0.1028, 0.0274, 0.1230])
output:  tensor([0.4550, 0.5165, 0.4872, 0.5036, 0.4938, 0.5087, 0.4479],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1184, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0111, 0.5362, 0.8525, 0.2511, 0.1298, 0.0379, 0.4831])
output:  tensor([0.4511, 0.5104, 0.4804, 0.4974, 0.5311, 0.5033,

len labels =  7
len otput[i] =  7
labels:  tensor([0.1529, 0.6380, 0.8987, 0.8674, 0.2889, 0.0659, 0.1700])
output:  tensor([0.5954, 0.5419, 0.5117, 0.5025, 0.5380, 0.4843, 0.4679],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1163, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8617, 0.5518, 0.4012, 0.9374, 0.1137, 0.3516, 0.4642])
output:  tensor([0.5010, 0.4870, 0.4382, 0.4705, 0.5466, 0.4635, 0.4734],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0791, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7735, 0.6711, 0.6157, 0.8330, 0.1266, 0.0350, 0.9121])
output:  tensor([0.4490, 0.5143, 0.4924, 0.4805, 0.5062, 0.5124, 0.4465],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1226, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0247, 0.5987, 0.8713, 0.6131, 0.1861, 0.0296, 0

output:  tensor([0.4974, 0.5173, 0.4852, 0.4635, 0.4776, 0.5026, 0.4931],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1205, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9332, 0.6939, 0.7193, 0.7917, 0.1300, 0.0525, 0.9501])
output:  tensor([0.4491, 0.5129, 0.4710, 0.4709, 0.4938, 0.5410, 0.4757],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1468, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9905, 0.4526, 0.0984, 0.5763, 0.1165, 0.0646, 0.6097])
output:  tensor([0.5534, 0.5402, 0.4645, 0.4813, 0.4489, 0.4693, 0.4889],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0901, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5417, 0.2582, 0.9985, 0.8890, 0.0959, 0.2820, 0.0377])
output:  tensor([0.4064, 0.5050, 0.4436, 0.4501, 0.5220, 0.6375, 0.5118],
       device='cuda:0', gr

loss:  tensor(0.0702, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1622, 0.7402, 0.5189, 0.8761, 0.1134, 0.0710, 0.8498])
output:  tensor([0.4314, 0.5065, 0.4675, 0.4618, 0.5306, 0.5221, 0.4415],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1208, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9623, 0.1865, 0.1417, 0.5344, 0.0723, 0.0382, 0.0715])
output:  tensor([0.4851, 0.5087, 0.4928, 0.4801, 0.4802, 0.4735, 0.4352],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1351, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8613, 0.4703, 0.6467, 0.9725, 0.1113, 0.1472, 0.7966])
output:  tensor([0.5071, 0.5429, 0.3835, 0.5215, 0.5149, 0.4726, 0.5333],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1059, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

len labels =  7
len otput[i] =  7
labels:  tensor([0.0445, 0.4112, 0.9520, 0.4811, 0.1081, 0.0328, 0.7979])
output:  tensor([0.3483, 0.4952, 0.4739, 0.4404, 0.4990, 0.6446, 0.5180],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1336, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0062, 0.6066, 0.1780, 0.9565, 0.1122, 0.0763, 0.5312])
output:  tensor([0.5358, 0.5323, 0.4501, 0.4860, 0.4199, 0.5190, 0.4108],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1266, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.6614e-04, 5.6810e-01, 8.3239e-01, 4.2555e-05, 4.0739e-01, 6.5958e-02,
        5.3596e-01])
output:  tensor([0.4579, 0.4939, 0.5064, 0.4931, 0.5102, 0.5006, 0.4723],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1097, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0077, 0.362

output:  tensor([0.5101, 0.5051, 0.5026, 0.5133, 0.4968, 0.4839, 0.4798],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0959, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4230, 0.4503, 0.8817, 0.8119, 0.1776, 0.1344, 0.4131])
output:  tensor([0.5511, 0.5471, 0.4976, 0.5421, 0.5677, 0.4521, 0.4630],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0717, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2780, 0.7336, 0.6407, 0.9099, 0.1108, 0.1261, 0.4414])
output:  tensor([0.5615, 0.5376, 0.4850, 0.5105, 0.5357, 0.5008, 0.5249],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0901, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2984, 0.1769, 0.9488, 0.9591, 0.2433, 0.0550, 0.3110])
output:  tensor([0.3701, 0.4897, 0.4845, 0.4331, 0.5041, 0.5885, 0.4906],
       device='cuda:0', gr

labels:  tensor([0.0195, 0.2812, 0.7707, 0.0043, 0.1055, 0.1738, 0.4672])
output:  tensor([0.3933, 0.4927, 0.5153, 0.4609, 0.5080, 0.5599, 0.5208],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1103, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.2273e-01, 3.5274e-01, 2.8079e-01, 5.0687e-04, 1.0861e-01, 2.9838e-02,
        5.3673e-02])
output:  tensor([0.4844, 0.5349, 0.4429, 0.5057, 0.4827, 0.4843, 0.4748],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1472, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9659, 0.4651, 0.0924, 0.3841, 0.1102, 0.0340, 0.3435])
output:  tensor([0.5509, 0.5712, 0.4177, 0.4839, 0.4870, 0.4596, 0.4391],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0902, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0133, 0.6338, 0.4142, 0.8064, 0.0924, 0.0641,

labels:  tensor([0.5798, 0.2454, 0.6370, 0.7742, 0.1122, 0.0604, 0.1834])
output:  tensor([0.4705, 0.5142, 0.5514, 0.4939, 0.5039, 0.4765, 0.4514],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0812, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.2257e-04, 6.8397e-01, 6.9860e-01, 7.9032e-01, 1.0984e-01, 5.0081e-02,
        3.3144e-01])
output:  tensor([0.4242, 0.5065, 0.5094, 0.4753, 0.4835, 0.4986, 0.4672],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1008, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0372, 0.5439, 0.7485, 0.1104, 0.0880, 0.0242, 0.3224])
output:  tensor([0.3590, 0.4861, 0.5440, 0.4677, 0.4156, 0.5708, 0.4925],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1016, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0236, 0.3695, 0.9896, 0.8758, 0.0824, 0.0434,

loss:  tensor(0.0718, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2377, 0.4326, 0.5478, 0.3622, 0.5068, 0.0285, 0.5028])
output:  tensor([0.4528, 0.5019, 0.5157, 0.4891, 0.4546, 0.4899, 0.4900],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0406, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.6754e-03, 4.1621e-01, 7.4358e-01, 1.9850e-07, 1.2100e-01, 9.2818e-02,
        6.8415e-01])
output:  tensor([0.3911, 0.4873, 0.5848, 0.4646, 0.4665, 0.5229, 0.4649],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1069, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0071, 0.4660, 0.9639, 0.9084, 0.1278, 0.0421, 0.5861])
output:  tensor([0.3722, 0.4943, 0.5879, 0.4492, 0.4545, 0.5635, 0.4868],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1250, device='cuda:0', grad_fn=<MseLossBackward0

loss:  tensor(0.1022, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0032, 0.5650, 0.7841, 0.8375, 0.0714, 0.0761, 0.3735])
output:  tensor([0.3963, 0.5021, 0.5580, 0.5122, 0.5125, 0.4858, 0.4433],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0975, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0437, 0.3965, 0.8998, 0.6071, 0.1408, 0.0363, 0.5688])
output:  tensor([0.4426, 0.5271, 0.5271, 0.5047, 0.4628, 0.4924, 0.4343],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0936, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6986, 0.5275, 0.9204, 0.9256, 0.1046, 0.0658, 0.3006])
output:  tensor([0.4058, 0.4821, 0.5726, 0.4678, 0.4858, 0.5353, 0.4640],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1158, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.4762, 0.4047, 0.9727, 0.1093, 0.0344, 0.8029])
output:  tensor([0.6495, 0.5707, 0.4420, 0.5865, 0.4821, 0.4580, 0.4890],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0995, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7029, 0.2503, 0.6580, 0.9603, 0.3080, 0.0417, 0.3465])
output:  tensor([0.3935, 0.5215, 0.5800, 0.4829, 0.4821, 0.4587, 0.4391],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0880, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8770, 0.6003, 0.4751, 0.7413, 0.1048, 0.0346, 0.6889])
output:  tensor([0.4378, 0.5078, 0.5043, 0.4712, 0.4357, 0.4661, 0.5166],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0858, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

labels:  tensor([0.5649, 0.5219, 0.7003, 0.8394, 0.0832, 0.1601, 0.7728])
output:  tensor([0.5205, 0.4804, 0.4828, 0.4425, 0.4123, 0.4091, 0.4863],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0658, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9052, 0.2948, 0.5473, 0.9338, 0.0869, 0.2592, 0.2979])
output:  tensor([0.5069, 0.5245, 0.5259, 0.5263, 0.4978, 0.4329, 0.4290],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0849, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9028, 0.3002, 0.9793, 0.9646, 0.1627, 0.0348, 0.4936])
output:  tensor([0.3648, 0.5037, 0.6151, 0.4692, 0.4675, 0.4764, 0.4103],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8637, 0.4018, 0.7200, 0.8881, 0.0901, 0.0434, 0.5965])
output:  tensor([0.5199, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.8517, 0.3748, 0.6830, 0.8233, 0.0920, 0.0271, 0.4244])
output:  tensor([0.3928, 0.5197, 0.5596, 0.5046, 0.4354, 0.4414, 0.5139],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0923, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0098, 0.5486, 0.9300, 0.8439, 0.0955, 0.0413, 0.7324])
output:  tensor([0.3458, 0.4835, 0.6657, 0.4975, 0.5000, 0.4294, 0.4062],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1039, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9901, 0.2523, 0.2315, 0.0130, 0.6411, 0.0814, 0.2721])
output:  tensor([0.5179, 0.5278, 0.4182, 0.5162, 0.4193, 0.4392, 0.4623],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1143, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7690, 0.2985, 0.1511, 0.0060, 0.1051, 0.0341, 0

output:  tensor([0.5336, 0.5423, 0.4738, 0.5327, 0.4619, 0.4403, 0.4770],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1101, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9921, 0.1800, 0.1228, 0.9223, 0.1319, 0.0397, 0.0210])
output:  tensor([0.6741, 0.4951, 0.4426, 0.4656, 0.4662, 0.3929, 0.4731],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1360, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7618, 0.6653, 0.6011, 0.9129, 0.0943, 0.9082, 0.0283])
output:  tensor([0.9221, 0.5644, 0.0920, 0.7075, 0.4933, 0.4252, 0.4132],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1254, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.1048e-06, 2.0607e-01, 2.5265e-01, 8.1644e-01, 1.5109e-01, 5.1029e-02,
        3.4419e-01])
output:  tensor([0.2580, 0.4355, 0.7034, 0.4585, 0.3894, 0.5134,

loss:  tensor(0.1328, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4513, 0.5215, 0.6766, 0.9161, 0.1186, 0.0395, 0.8437])
output:  tensor([0.5372, 0.5413, 0.4400, 0.5311, 0.4265, 0.3965, 0.4400],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0853, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0428, 0.7703, 0.9602, 0.9592, 0.0608, 0.0468, 0.5310])
output:  tensor([0.3810, 0.4699, 0.6525, 0.4700, 0.4761, 0.4269, 0.4946],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1224, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9928, 0.3904, 0.0673, 0.0907, 0.1067, 0.0415, 0.2585])
output:  tensor([0.6608, 0.5567, 0.3473, 0.5775, 0.4507, 0.4122, 0.3802],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1034, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0791, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.0032e-01, 6.7666e-01, 8.8247e-01, 8.4000e-08, 1.4848e-01, 4.2503e-02,
        9.4088e-01])
output:  tensor([0.5448, 0.4873, 0.4506, 0.5056, 0.3974, 0.3858, 0.4914],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1322, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4746, 0.5353, 0.9292, 0.0072, 0.2303, 0.0734, 0.8032])
output:  tensor([0.3395, 0.4688, 0.7275, 0.4933, 0.4208, 0.4368, 0.4890],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0809, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.1945, 0.0258, 0.9491, 0.1498, 0.0340, 0.3292])
output:  tensor([0.7499, 0.5091, 0.1631, 0.5390, 0.4268, 0.4115, 0.4192],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0820, device='cuda:0', grad_fn=<MseLossBackward0

loss:  tensor(0.1060, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9086, 0.3499, 0.8613, 0.0084, 0.2359, 0.0829, 0.6961])
output:  tensor([0.3219, 0.4685, 0.7098, 0.4852, 0.4013, 0.4161, 0.4670],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1142, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9857, 0.1776, 0.7201, 0.9613, 0.1197, 0.0455, 0.1492])
output:  tensor([0.4798, 0.4840, 0.5330, 0.4763, 0.3740, 0.3865, 0.4248],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1253, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6215, 0.3591, 0.9843, 0.8241, 0.1575, 0.0438, 0.6153])
output:  tensor([0.3147, 0.5119, 0.6552, 0.5460, 0.4075, 0.4103, 0.4234],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0767, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.3732, 0.5036, 0.6612, 0.5091, 0.4215, 0.3812, 0.4805],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0745, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9941, 0.3987, 0.4916, 0.1378, 0.4723, 0.0537, 0.4092])
output:  tensor([0.5462, 0.5097, 0.5509, 0.4958, 0.4551, 0.3583, 0.4811],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0633, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9613, 0.8467, 0.3747, 0.9677, 0.1102, 0.0513, 0.8665])
output:  tensor([0.5947, 0.5587, 0.4176, 0.5529, 0.4857, 0.4056, 0.4625],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1173, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5203, 0.6440, 0.6213, 0.5336, 0.1568, 0.0450, 0.9662])
output:  tensor([0.5053, 0.4978, 0.5500, 0.5359, 0.4203, 0.3892, 0.5013],
       device='cuda:0', gr

len otput[i] =  7
labels:  tensor([7.8482e-01, 6.2567e-01, 5.7657e-01, 5.9958e-04, 1.0727e-01, 4.1440e-02,
        7.2147e-01])
output:  tensor([0.3880, 0.4876, 0.6355, 0.5067, 0.3950, 0.3697, 0.4670],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0988, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0052, 0.7140, 0.5900, 0.1830, 0.6345, 0.0347, 0.7269])
output:  tensor([0.4795, 0.5361, 0.5386, 0.5130, 0.4258, 0.3498, 0.4988],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0804, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2720, 0.7670, 0.7267, 0.8632, 0.1234, 0.0740, 0.9167])
output:  tensor([0.7199, 0.5412, 0.3744, 0.5477, 0.4243, 0.3776, 0.4848],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1206, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0025, 0.6464, 0.8080, 0.934

loss:  tensor(0.0571, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0340, 0.7359, 0.5508, 0.9189, 0.0954, 0.0450, 0.1717])
output:  tensor([0.4849, 0.5270, 0.5123, 0.5349, 0.4028, 0.3626, 0.4741],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0975, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9643, 0.2516, 0.2509, 0.8122, 0.1212, 0.0311, 0.4499])
output:  tensor([0.5496, 0.5322, 0.4880, 0.5326, 0.4235, 0.3429, 0.4710],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0820, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1787, 0.2954, 0.9257, 0.0651, 0.3704, 0.0463, 0.7214])
output:  tensor([0.4495, 0.4992, 0.6303, 0.5262, 0.3762, 0.3701, 0.4330],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0861, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.2862, 0.4782, 0.7584, 0.4972, 0.3928, 0.3210, 0.4561],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0570, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.4594e-05, 7.2809e-01, 8.1518e-01, 8.1631e-01, 9.5464e-02, 2.4349e-01,
        9.1118e-01])
output:  tensor([0.4576, 0.5213, 0.5443, 0.5102, 0.4256, 0.3642, 0.5298],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0983, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0486, 0.7087, 0.8161, 0.0358, 0.1095, 0.0559, 0.9079])
output:  tensor([0.2818, 0.4624, 0.8228, 0.5237, 0.3688, 0.3250, 0.4666],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0982, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9950, 0.2646, 0.0623, 0.9560, 0.6839, 0.0403, 0.0621])
output:  tensor([0.9167, 0.5265, 0.0407, 0.6720, 0.4498, 0.4102,

loss:  tensor(0.0819, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4316, 0.4167, 0.6651, 0.1401, 0.1498, 0.0335, 0.2752])
output:  tensor([0.2613, 0.4432, 0.8449, 0.5153, 0.3475, 0.3189, 0.4450],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0503, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2924, 0.7243, 0.5691, 0.3041, 0.0962, 0.0794, 0.3081])
output:  tensor([0.7050, 0.4904, 0.2892, 0.5141, 0.3406, 0.3365, 0.4340],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0699, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9950, 0.2410, 0.1197, 0.0414, 0.8135, 0.0784, 0.1845])
output:  tensor([0.5240, 0.4991, 0.5300, 0.5507, 0.3238, 0.3112, 0.4757],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.3148, 0.4766, 0.7597, 0.5212, 0.3546, 0.3436, 0.4215],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0760, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.3978, 0.2355, 0.9502, 0.3848, 0.0356, 0.9079])
output:  tensor([0.8159, 0.4922, 0.1472, 0.5861, 0.4310, 0.3794, 0.3905],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0814, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1399, 0.8414, 0.3790, 0.8836, 0.2346, 0.0375, 0.3709])
output:  tensor([0.5529, 0.5615, 0.4002, 0.5587, 0.4190, 0.3528, 0.5141],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0727, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0541, 0.2694, 0.9970, 0.2465, 0.1336, 0.0335, 0.5548])
output:  tensor([0.2312, 0.4757, 0.7994, 0.5139, 0.3839, 0.3327, 0.4949],
       device='cuda:0', gr

labels:  tensor([0.8913, 0.8636, 0.5402, 0.8014, 0.0932, 0.1624, 0.2650])
output:  tensor([0.3269, 0.4988, 0.7419, 0.5116, 0.3671, 0.3137, 0.5185],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1055, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9895, 0.3518, 0.0109, 0.9599, 0.0886, 0.0561, 0.8305])
output:  tensor([0.9782, 0.4711, 0.0094, 0.8048, 0.4556, 0.3535, 0.2074],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0928, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9833, 0.3940, 0.0333, 0.9394, 0.1454, 0.0501, 0.2800])
output:  tensor([0.7797, 0.4717, 0.1608, 0.5867, 0.4176, 0.3639, 0.4545],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0559, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.8296e-04, 2.5203e-01, 8.7157e-01, 9.6540e-01, 1.0017e-01, 4.2156e-02,
        7.2

output:  tensor([0.8081, 0.4958, 0.1501, 0.5419, 0.3619, 0.3270, 0.3341],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0495, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9895, 0.4940, 0.1845, 0.9684, 0.1166, 0.0423, 0.2670])
output:  tensor([0.5498, 0.5793, 0.4144, 0.5796, 0.3275, 0.3561, 0.5043],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0863, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4051, 0.7730, 0.7572, 0.0119, 0.3105, 0.0709, 0.8995])
output:  tensor([0.3582, 0.5005, 0.7437, 0.5133, 0.3456, 0.3000, 0.5115],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0761, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.6994e-04, 8.8148e-01, 4.7648e-01, 2.5389e-02, 6.9216e-02, 8.4174e-01,
        7.3815e-01])
output:  tensor([0.4710, 0.5524, 0.5361, 0.5342, 0.3629, 0.3385,

output:  tensor([0.4144, 0.5000, 0.7400, 0.5709, 0.3634, 0.2943, 0.4851],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9947, 0.3046, 0.0815, 0.8896, 0.6578, 0.0422, 0.1993])
output:  tensor([0.8308, 0.4934, 0.0954, 0.6007, 0.3986, 0.3424, 0.3679],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0474, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2565, 0.8303, 0.2962, 0.8904, 0.1912, 0.1795, 0.3018])
output:  tensor([0.5994, 0.5808, 0.3061, 0.5854, 0.3388, 0.3224, 0.4713],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0491, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5877, 0.7247, 0.7249, 0.8463, 0.1004, 0.0563, 0.6585])
output:  tensor([0.3532, 0.4962, 0.7386, 0.5295, 0.3272, 0.2750, 0.4473],
       device='cuda:0', gr

loss:  tensor(0.0616, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6682, 0.4783, 0.5061, 0.9209, 0.1222, 0.0353, 0.1838])
output:  tensor([0.4031, 0.6218, 0.5659, 0.5799, 0.3716, 0.3024, 0.5573],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0691, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1471, 0.1556, 0.7041, 0.9549, 0.1874, 0.0456, 0.3513])
output:  tensor([0.3233, 0.5277, 0.7142, 0.5650, 0.3160, 0.2672, 0.4808],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0577, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.8606e-04, 3.9659e-01, 9.0075e-01, 8.9380e-01, 1.4781e-01, 5.9033e-02,
        7.4883e-01])
output:  tensor([0.2249, 0.4482, 0.8776, 0.5267, 0.3069, 0.2708, 0.4724],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0478, device='cuda:0', grad_fn=<MseLossBackward0

labels:  tensor([2.5801e-06, 7.8005e-01, 4.6343e-01, 9.7637e-01, 8.5150e-02, 1.0476e-01,
        5.1742e-01])
output:  tensor([0.6556, 0.4946, 0.4570, 0.5271, 0.3056, 0.3452, 0.5746],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1176, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9928, 0.4173, 0.2107, 0.0815, 0.4558, 0.1292, 0.2900])
output:  tensor([0.9010, 0.4297, 0.0298, 0.7500, 0.3872, 0.3555, 0.2541],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0779, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9400, 0.2724, 0.1904, 0.9645, 0.0847, 0.0442, 0.2918])
output:  tensor([0.6748, 0.5145, 0.2530, 0.6172, 0.3240, 0.3443, 0.4775],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0622, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9426, 0.3173, 0.0956, 0.8695, 0.2315, 0.0379,

output:  tensor([0.5094, 0.4915, 0.6545, 0.5489, 0.3248, 0.2718, 0.5363],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0902, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8948, 0.5371, 0.8299, 0.8711, 0.0433, 0.0402, 0.9109])
output:  tensor([0.3162, 0.5210, 0.7351, 0.5441, 0.3069, 0.2732, 0.4987],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1064, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0913, 0.3898, 0.5633, 0.0070, 0.1033, 0.0330, 0.3508])
output:  tensor([0.4478, 0.5357, 0.6502, 0.4986, 0.3404, 0.2586, 0.5155],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0760, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0101, 0.8166, 0.3884, 0.8413, 0.5334, 0.0729, 0.3795])
output:  tensor([0.5589, 0.5736, 0.3739, 0.5705, 0.3092, 0.3396, 0.5587],
       device='cuda:0', gr

labels:  tensor([0.1785, 0.4940, 0.3080, 0.8739, 0.1201, 0.0979, 0.1691])
output:  tensor([0.7578, 0.4723, 0.1176, 0.5945, 0.2747, 0.3069, 0.2799],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0758, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5054, 0.3101, 0.9689, 0.8522, 0.0943, 0.2059, 0.1751])
output:  tensor([0.2532, 0.4017, 0.9320, 0.5968, 0.2444, 0.2265, 0.4748],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0359, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([5.8302e-01, 6.4313e-01, 7.8806e-01, 7.5086e-06, 1.4999e-01, 1.1565e-01,
        5.5497e-01])
output:  tensor([0.3096, 0.5024, 0.8193, 0.5039, 0.3343, 0.2346, 0.4632],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0580, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.1564e-01, 4.1155e-01, 7.7744e-01, 3.4075e-04,

loss:  tensor(0.0400, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8671, 0.7561, 0.9877, 0.8441, 0.0970, 0.5341, 0.0363])
output:  tensor([0.6907, 0.5511, 0.2289, 0.5556, 0.3106, 0.3110, 0.4627],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1442, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9836, 0.5992, 0.5086, 0.9211, 0.3515, 0.0285, 0.3160])
output:  tensor([0.3124, 0.4140, 0.8808, 0.5969, 0.2343, 0.2269, 0.4981],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1164, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5237, 0.5944, 0.7836, 0.9046, 0.1061, 0.0400, 0.8741])
output:  tensor([0.4902, 0.5487, 0.4802, 0.5557, 0.2970, 0.2616, 0.5469],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0585, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0700, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1079, 0.2915, 0.2005, 0.9671, 0.0556, 0.0276, 0.0630])
output:  tensor([0.6559, 0.4959, 0.3409, 0.5992, 0.2974, 0.2578, 0.4472],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1080, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9224, 0.5708, 0.2414, 0.5428, 0.2981, 0.0359, 0.2212])
output:  tensor([0.5675, 0.5376, 0.4495, 0.5650, 0.3064, 0.2835, 0.5532],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0489, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0564, 0.1415, 0.9480, 0.8968, 0.6528, 0.0849, 0.1769])
output:  tensor([0.3171, 0.3345, 0.9794, 0.6358, 0.2417, 0.1730, 0.5675],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0720, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0517, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.7813e-01, 3.9528e-01, 6.8535e-01, 4.7421e-06, 1.6253e-01, 3.1021e-01,
        6.9200e-01])
output:  tensor([0.3485, 0.5356, 0.7558, 0.5339, 0.3028, 0.2305, 0.5173],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0530, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0381, 0.7347, 0.7158, 0.9087, 0.2644, 0.0729, 0.3721])
output:  tensor([0.3818, 0.5635, 0.6038, 0.5772, 0.3260, 0.2519, 0.5261],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0471, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0563, 0.6569, 0.6927, 0.2533, 0.2525, 0.1298, 0.4467])
output:  tensor([0.4858, 0.5200, 0.5400, 0.5786, 0.3426, 0.2667, 0.4849],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0515, device='cuda:0', grad_fn=<MseLossBackward0

output:  tensor([0.7065, 0.4739, 0.1734, 0.6541, 0.2777, 0.2676, 0.3696],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0648, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9811, 0.6217, 0.3315, 0.9552, 0.0755, 0.0287, 0.3633])
output:  tensor([0.6644, 0.5336, 0.3169, 0.5801, 0.2631, 0.2419, 0.4617],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0485, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9263, 0.2313, 0.8730, 0.8230, 0.1037, 0.0441, 0.2461])
output:  tensor([0.4164, 0.5360, 0.6471, 0.5723, 0.3113, 0.2533, 0.5272],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0904, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.6340e-01, 4.7770e-01, 6.6338e-01, 1.7616e-06, 1.1657e-01, 8.5339e-02,
        6.1176e-01])
output:  tensor([0.3939, 0.5342, 0.6481, 0.5685, 0.3022, 0.2381,

len labels =  7
len otput[i] =  7
labels:  tensor([0.1148, 0.5323, 0.6889, 0.0415, 0.0761, 0.0270, 0.7266])
output:  tensor([0.3017, 0.4757, 0.8575, 0.5770, 0.2802, 0.2030, 0.4845],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0692, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.5984e-01, 5.9748e-01, 5.3185e-01, 9.0000e-10, 9.9233e-02, 3.3218e-02,
        4.7363e-01])
output:  tensor([0.3116, 0.5300, 0.7804, 0.5461, 0.2993, 0.2126, 0.5189],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0631, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6949, 0.3774, 0.3528, 0.5891, 0.3579, 0.0414, 0.7212])
output:  tensor([0.5730, 0.5953, 0.4482, 0.5515, 0.3104, 0.2377, 0.5246],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0218, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9954, 0.597

output:  tensor([0.5392, 0.4804, 0.6648, 0.5454, 0.2837, 0.2009, 0.4910],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0845, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9303, 0.1481, 0.2152, 0.9241, 0.1802, 0.0277, 0.1861])
output:  tensor([0.6250, 0.4892, 0.4941, 0.5871, 0.2644, 0.2191, 0.4729],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0753, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8815, 0.3717, 0.6379, 0.6990, 0.1184, 0.2432, 0.5674])
output:  tensor([0.3717, 0.4835, 0.7876, 0.5743, 0.2571, 0.1920, 0.4932],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0483, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.2290, 0.3081, 0.9725, 0.1330, 0.0427, 0.2170])
output:  tensor([0.7665, 0.4681, 0.2634, 0.6311, 0.2816, 0.2158, 0.3766],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.1665, 0.2208, 0.8674, 0.7799, 0.1643, 0.0645, 0.0919])
output:  tensor([0.5381, 0.4949, 0.6346, 0.5228, 0.3078, 0.1873, 0.4532],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0714, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1718, 0.0800, 0.9878, 0.9793, 0.0918, 0.1035, 0.0383])
output:  tensor([0.2749, 0.4063, 0.9287, 0.5811, 0.2170, 0.1773, 0.4298],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0648, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8575, 0.3235, 0.5714, 0.9611, 0.1130, 0.0586, 0.1455])
output:  tensor([0.6430, 0.4548, 0.2978, 0.6810, 0.2504, 0.2422, 0.4449],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0513, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([4.3151e-01, 7.6017e-01, 6.4333e-01, 9.0458e-05, 7

len labels =  7
len otput[i] =  7
labels:  tensor([0.0178, 0.7511, 0.5309, 0.9443, 0.0981, 0.0343, 0.6277])
output:  tensor([0.4954, 0.5142, 0.5691, 0.6323, 0.3035, 0.2518, 0.5039],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0697, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.5753e-01, 6.5019e-01, 2.6551e-01, 2.6027e-05, 2.3063e-01, 9.1949e-01,
        6.6676e-01])
output:  tensor([0.7662, 0.4745, 0.0953, 0.6606, 0.2807, 0.2780, 0.3046],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1540, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3888, 0.7898, 0.8349, 0.2911, 0.1916, 0.1798, 0.4403])
output:  tensor([0.3724, 0.5255, 0.6342, 0.5913, 0.2615, 0.2382, 0.4960],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0303, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7961, 0.329

output:  tensor([0.5306, 0.5017, 0.5635, 0.5355, 0.3165, 0.2064, 0.5060],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0800, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9666, 0.3861, 0.6520, 0.8695, 0.3531, 0.0322, 0.7124])
output:  tensor([0.4270, 0.4649, 0.7502, 0.5529, 0.2696, 0.1971, 0.4588],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0722, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5987, 0.1831, 0.5642, 0.9282, 0.0820, 0.0450, 0.1040])
output:  tensor([0.4894, 0.4700, 0.5678, 0.5810, 0.2280, 0.2064, 0.4874],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0585, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0716, 0.1608, 0.1822, 0.9486, 0.1334, 0.0329, 0.1934])
output:  tensor([0.7496, 0.4098, 0.1456, 0.7199, 0.2818, 0.2876, 0.3117],
       device='cuda:0', gr

labels:  tensor([0.9613, 0.5584, 0.5867, 0.8780, 0.0909, 0.0431, 0.8637])
output:  tensor([0.4764, 0.5024, 0.6098, 0.6101, 0.2382, 0.2322, 0.5191],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0695, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9569, 0.4619, 0.4104, 0.7908, 0.1094, 0.0456, 0.6214])
output:  tensor([0.4615, 0.5699, 0.4372, 0.6177, 0.3063, 0.2094, 0.4621],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0541, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4921, 0.7087, 0.6871, 0.8252, 0.0953, 0.0411, 0.8700])
output:  tensor([0.3484, 0.5158, 0.7268, 0.5571, 0.3019, 0.2053, 0.5141],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0053, 0.1048, 0.5604, 0.9392, 0.3454, 0.0556, 0.3734])
output:  tensor([0.2083, 0

average loss: 5.932113355398178
len labels =  7
len otput[i] =  7
labels:  tensor([0.8175, 0.6061, 0.5117, 0.7327, 0.1115, 0.0382, 0.3462])
output:  tensor([0.4438, 0.4627, 0.7386, 0.5424, 0.2705, 0.1848, 0.4729],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0444, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.6307, 0.1832, 0.9651, 0.1819, 0.3844, 0.7804])
output:  tensor([0.9052, 0.4274, 0.0379, 0.7144, 0.2928, 0.2535, 0.3852],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0456, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9021, 0.3732, 0.4060, 0.8361, 0.1040, 0.0296, 0.3265])
output:  tensor([0.4987, 0.3813, 0.8518, 0.6521, 0.1905, 0.1541, 0.3880],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0603, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([6.6119e-06, 3.944

loss:  tensor(0.0961, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2045, 0.8254, 0.3564, 0.9255, 0.1193, 0.0529, 0.7171])
output:  tensor([0.5759, 0.5346, 0.3917, 0.5996, 0.2922, 0.2233, 0.4613],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0649, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6514, 0.5846, 0.6430, 0.1863, 0.3406, 0.0264, 0.6984])
output:  tensor([0.4531, 0.5260, 0.6303, 0.5868, 0.2344, 0.2090, 0.4852],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0419, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0102, 0.3481, 0.9246, 0.8439, 0.1630, 0.1322, 0.5766])
output:  tensor([0.4637, 0.4668, 0.8067, 0.5257, 0.2667, 0.1491, 0.4128],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0533, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.4659, 0.4892, 0.6687, 0.6096, 0.2401, 0.1807, 0.4492],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0419, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.4308e-01, 7.4380e-01, 6.4083e-01, 1.0628e-04, 9.3631e-02, 5.5335e-02,
        9.6940e-01])
output:  tensor([0.4470, 0.4615, 0.7633, 0.5830, 0.2488, 0.1612, 0.4456],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1287, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2314, 0.6580, 0.6459, 0.0032, 0.0757, 0.0337, 0.9160])
output:  tensor([0.5583, 0.5531, 0.3952, 0.6126, 0.2479, 0.2149, 0.5036],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1121, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1489, 0.8748, 0.5490, 0.8897, 0.0660, 0.3094, 0.4596])
output:  tensor([0.3986, 0.6080, 0.4575, 0.6654, 0.2793, 0.2301,

loss:  tensor(0.0764, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7427, 0.6117, 0.5654, 0.9146, 0.1839, 0.0254, 0.8019])
output:  tensor([0.3140, 0.4353, 0.8080, 0.5881, 0.2519, 0.1870, 0.4270],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0788, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9920, 0.2965, 0.6335, 0.9910, 0.5716, 0.0419, 0.3783])
output:  tensor([0.6587, 0.4806, 0.4726, 0.5829, 0.2121, 0.1710, 0.4668],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0702, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9211, 0.1894, 0.8740, 0.9526, 0.0938, 0.0545, 0.0339])
output:  tensor([0.6360, 0.5239, 0.3986, 0.6418, 0.2629, 0.2143, 0.4052],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1011, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0589, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0239, 0.7643, 0.6646, 0.8954, 0.1769, 0.2141, 0.4796])
output:  tensor([0.3528, 0.6144, 0.5849, 0.6393, 0.3048, 0.2041, 0.5379],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0318, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9548, 0.4961, 0.0682, 0.9081, 0.1164, 0.0379, 0.0712])
output:  tensor([0.6873, 0.5341, 0.1838, 0.6244, 0.2449, 0.2102, 0.4054],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1140, 0.5351, 0.6975, 0.8707, 0.1061, 0.0417, 0.0646])
output:  tensor([0.6237, 0.5298, 0.2724, 0.6499, 0.2171, 0.2211, 0.4474],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0972, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.3773, 0.5479, 0.6684, 0.6061, 0.2254, 0.1793, 0.5703],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0337, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([7.4708e-01, 8.0248e-01, 6.8053e-01, 7.0413e-06, 2.6542e-01, 2.4644e-01,
        8.2413e-01])
output:  tensor([0.4902, 0.4815, 0.7261, 0.4986, 0.2624, 0.1559, 0.4919],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0769, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2327, 0.5291, 0.5153, 0.4099, 0.1472, 0.0317, 0.3643])
output:  tensor([0.4988, 0.5265, 0.5606, 0.6047, 0.2408, 0.1800, 0.5120],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0233, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1628, 0.5180, 0.6231, 0.7278, 0.1091, 0.0292, 0.1566])
output:  tensor([0.2698, 0.4501, 0.7931, 0.6587, 0.2411, 0.1851,

len labels =  7
len otput[i] =  7
labels:  tensor([0.2299, 0.2829, 0.4031, 0.7723, 0.1347, 0.0315, 0.3274])
output:  tensor([0.6046, 0.5270, 0.4204, 0.5710, 0.2300, 0.1822, 0.4961],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0430, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([6.2279e-01, 6.1111e-01, 2.9364e-01, 8.0000e-10, 1.2156e-01, 7.2164e-02,
        3.6994e-01])
output:  tensor([0.5594, 0.4949, 0.4883, 0.6168, 0.2311, 0.1788, 0.4969],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0679, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2422, 0.7780, 0.2299, 0.8089, 0.0543, 0.0349, 0.8380])
output:  tensor([0.4691, 0.5818, 0.4344, 0.6382, 0.2612, 0.2318, 0.5012],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0509, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9517, 0.150

loss:  tensor(0.0787, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1821, 0.3668, 0.3080, 0.0918, 0.2413, 0.0442, 0.2527])
output:  tensor([0.3732, 0.4541, 0.8077, 0.6166, 0.2226, 0.1579, 0.4191],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0872, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9384, 0.3906, 0.7059, 0.6104, 0.1899, 0.0340, 0.6536])
output:  tensor([0.4242, 0.5097, 0.6782, 0.6106, 0.2520, 0.1714, 0.4613],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0484, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9646, 0.2473, 0.0232, 0.6969, 0.0936, 0.0383, 0.0358])
output:  tensor([0.8855, 0.4237, 0.0400, 0.8056, 0.2026, 0.1906, 0.2930],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0215, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

labels:  tensor([0.0099, 0.3485, 0.6808, 0.0048, 0.5662, 0.0964, 0.1269])
output:  tensor([0.4397, 0.4668, 0.6246, 0.6747, 0.2239, 0.1710, 0.4233],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1230, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.6960e-01, 5.2860e-01, 1.7117e-01, 1.6215e-06, 9.5156e-02, 4.6591e-02,
        8.5017e-02])
output:  tensor([0.6358, 0.5418, 0.3490, 0.6029, 0.2145, 0.1768, 0.5092],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1177, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6339, 0.7397, 0.8678, 0.8975, 0.5791, 0.1681, 0.2489])
output:  tensor([0.3682, 0.5170, 0.6749, 0.5792, 0.2242, 0.1695, 0.4952],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0636, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9008, 0.3034, 0.5055, 0.9346, 0.5359, 0.0459,

labels:  tensor([1.0517e-04, 3.4031e-01, 5.9768e-01, 4.3969e-02, 7.7190e-02, 3.1803e-02,
        6.2079e-02])
output:  tensor([0.3635, 0.4621, 0.8132, 0.5464, 0.2234, 0.1695, 0.4454],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0904, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8264, 0.4562, 0.3127, 0.0698, 0.1760, 0.0436, 0.5171])
output:  tensor([0.3321, 0.5576, 0.6898, 0.5906, 0.2602, 0.1781, 0.5326],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0991, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9955, 0.3339, 0.1710, 0.9464, 0.7450, 0.2385, 0.4751])
output:  tensor([0.6899, 0.4952, 0.3911, 0.6184, 0.2038, 0.1594, 0.4881],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0821, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9223, 0.3960, 0.6060, 0.6443, 0.1217, 0.0347,

output:  tensor([0.4039, 0.5463, 0.5141, 0.6718, 0.2209, 0.1818, 0.4826],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9281, 0.8136, 0.2585, 0.4165, 0.0841, 0.0448, 0.2724])
output:  tensor([0.7947, 0.4804, 0.1491, 0.7186, 0.2125, 0.1861, 0.3705],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0397, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9713, 0.5945, 0.2458, 0.0016, 0.3394, 0.4160, 0.4833])
output:  tensor([0.6195, 0.4550, 0.5867, 0.5425, 0.2200, 0.1374, 0.4869],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0920, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9923, 0.3202, 0.5311, 0.9500, 0.3065, 0.0562, 0.7155])
output:  tensor([0.5212, 0.5545, 0.4965, 0.6330, 0.2076, 0.1723, 0.5185],
       device='cuda:0', gr

loss:  tensor(0.0195, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7319, 0.1941, 0.9636, 0.9565, 0.2459, 0.0586, 0.2855])
output:  tensor([0.6274, 0.3734, 0.8094, 0.6408, 0.1935, 0.1277, 0.3763],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0260, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4347, 0.6772, 0.6219, 0.1034, 0.0993, 0.0503, 0.4545])
output:  tensor([0.3235, 0.5072, 0.7235, 0.5979, 0.2431, 0.1655, 0.4575],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0471, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.5209e-02, 2.6202e-01, 6.6207e-01, 6.3720e-07, 2.3664e-01, 1.0117e-01,
        6.3877e-01])
output:  tensor([0.6059, 0.5200, 0.4089, 0.6378, 0.2025, 0.1595, 0.4915],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1271, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.2060, 0.2058, 0.6183, 0.9397, 0.1133, 0.0390, 0.3682])
output:  tensor([0.4766, 0.4385, 0.6815, 0.5677, 0.2313, 0.1474, 0.4426],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0430, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([4.7701e-02, 5.6039e-01, 8.5572e-01, 9.3760e-06, 7.8499e-02, 5.1145e-02,
        3.9590e-01])
output:  tensor([0.3877, 0.4635, 0.7367, 0.5889, 0.2084, 0.1591, 0.4602],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0741, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1557, 0.3918, 0.6168, 0.9548, 0.1131, 0.1072, 0.4221])
output:  tensor([0.4029, 0.5271, 0.5507, 0.6312, 0.2140, 0.1568, 0.4814],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0292, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3883, 0.301

len labels =  7
len otput[i] =  7
labels:  tensor([7.1944e-01, 6.0221e-01, 5.0998e-01, 1.0000e-10, 1.1968e-01, 6.6681e-01,
        6.0463e-01])
output:  tensor([0.4270, 0.4904, 0.7465, 0.5597, 0.2503, 0.1619, 0.4844],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1077, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8849, 0.4406, 0.4667, 0.9729, 0.1115, 0.0293, 0.7712])
output:  tensor([0.4663, 0.4732, 0.6631, 0.6682, 0.2200, 0.1508, 0.4267],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0647, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7628, 0.2956, 0.5454, 0.9170, 0.0703, 0.0441, 0.4516])
output:  tensor([0.4117, 0.4933, 0.6580, 0.5770, 0.1924, 0.1516, 0.4991],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0456, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1647, 0.678

output:  tensor([0.6995, 0.4844, 0.2057, 0.7451, 0.1919, 0.1594, 0.3723],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0550, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4977, 0.5480, 0.5985, 0.6182, 0.1164, 0.0556, 0.9347])
output:  tensor([0.3240, 0.4702, 0.7168, 0.5724, 0.2292, 0.1627, 0.4991],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0380, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0082, 0.5898, 0.3165, 0.5704, 0.4488, 0.0402, 0.4791])
output:  tensor([0.5708, 0.5412, 0.5105, 0.5558, 0.2249, 0.1697, 0.5433],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0611, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9427, 0.5066, 0.4507, 0.9726, 0.1845, 0.0289, 0.0679])
output:  tensor([0.5179, 0.4625, 0.5369, 0.6916, 0.2005, 0.1614, 0.4321],
       device='cuda:0', gr

output:  tensor([0.7654, 0.4827, 0.4223, 0.5929, 0.2085, 0.1495, 0.4131],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0178, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0036, 0.6134, 0.6932, 0.8357, 0.8227, 0.0652, 0.6872])
output:  tensor([0.3503, 0.4657, 0.7679, 0.6172, 0.2280, 0.1520, 0.4555],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0872, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.2374, 0.0119, 0.9468, 0.0768, 0.0346, 0.1576])
output:  tensor([0.9548, 0.3533, 0.0212, 0.8847, 0.1717, 0.1616, 0.2304],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0071, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5837, 0.4733, 0.4414, 0.7481, 0.1760, 0.0329, 0.4235])
output:  tensor([0.5816, 0.5096, 0.5008, 0.6367, 0.2139, 0.1537, 0.4515],
       device='cuda:0', gr

loss:  tensor(0.0672, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([5.7026e-03, 3.1328e-01, 9.3932e-01, 3.1450e-07, 6.9643e-02, 3.7597e-02,
        6.3956e-01])
output:  tensor([0.4164, 0.4472, 0.7269, 0.6089, 0.2190, 0.1396, 0.4610],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0953, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0555, 0.5416, 0.4223, 0.8853, 0.1163, 0.0312, 0.8439])
output:  tensor([0.8190, 0.4659, 0.1004, 0.7764, 0.1895, 0.1821, 0.2750],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1509, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0615, 0.2876, 0.9387, 0.8753, 0.1165, 0.0906, 0.1426])
output:  tensor([0.4667, 0.4846, 0.6866, 0.6037, 0.2009, 0.1840, 0.4785],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0670, device='cuda:0', grad_fn=<MseLossBackward0

labels:  tensor([0.7412, 0.5772, 0.7243, 0.8324, 0.4661, 0.0346, 0.7344])
output:  tensor([0.6982, 0.4233, 0.5982, 0.5561, 0.2029, 0.1200, 0.4470],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0396, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8550, 0.4855, 0.7713, 0.9012, 0.1717, 0.0720, 0.7379])
output:  tensor([0.5728, 0.5327, 0.5240, 0.6176, 0.2082, 0.1515, 0.4957],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0414, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9738, 0.3351, 0.1258, 0.9223, 0.1105, 0.0336, 0.1438])
output:  tensor([0.6699, 0.4951, 0.3626, 0.6464, 0.1983, 0.1697, 0.3838],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0477, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9034, 0.5157, 0.8718, 0.8985, 0.1184, 0.0461, 0.9393])
output:  tensor([0.4920, 0

labels:  tensor([2.8954e-05, 2.9566e-01, 8.3615e-01, 5.5409e-01, 2.9212e-01, 6.5682e-02,
        2.2990e-01])
output:  tensor([0.3646, 0.3710, 0.8467, 0.6780, 0.2163, 0.1378, 0.4088],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0281, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7247, 0.7618, 0.6682, 0.8970, 0.0937, 0.1605, 0.8818])
output:  tensor([0.4355, 0.5512, 0.5422, 0.6475, 0.1894, 0.1513, 0.5100],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0505, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8405, 0.2823, 0.6776, 0.8907, 0.0972, 0.0420, 0.0838])
output:  tensor([0.4869, 0.5079, 0.5626, 0.6240, 0.2043, 0.1818, 0.4983],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0662, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9488, 0.3831, 0.6706, 0.8633, 0.1642, 0.0383,

output:  tensor([0.6390, 0.5205, 0.4948, 0.6176, 0.1775, 0.1315, 0.5096],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0953, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6857, 0.8857, 0.2808, 0.0029, 0.0584, 0.0592, 0.7681])
output:  tensor([0.7790, 0.4621, 0.2343, 0.6904, 0.1671, 0.1281, 0.3495],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1221, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0985, 0.6045, 0.7612, 0.0199, 0.1125, 0.0958, 0.3844])
output:  tensor([0.3963, 0.5422, 0.5136, 0.6760, 0.1903, 0.1473, 0.4941],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0865, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9936, 0.4877, 0.9535, 0.9428, 0.7675, 0.0311, 0.6476])
output:  tensor([0.6118, 0.4132, 0.5955, 0.6422, 0.1834, 0.1333, 0.4408],
       device='cuda:0', gr

output:  tensor([0.5638, 0.4093, 0.6843, 0.5967, 0.1987, 0.1189, 0.4585],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0334, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9569, 0.4740, 0.1673, 0.0275, 0.1402, 0.0367, 0.4183])
output:  tensor([0.7189, 0.5239, 0.3111, 0.6754, 0.1961, 0.1557, 0.3992],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0739, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0136, 0.3883, 0.9675, 0.8604, 0.1090, 0.0647, 0.1593])
output:  tensor([0.3281, 0.4026, 0.8071, 0.6272, 0.2016, 0.1295, 0.4221],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0373, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9956, 0.2925, 0.3773, 0.9263, 0.6296, 0.8748, 0.2353])
output:  tensor([0.8454, 0.4715, 0.0945, 0.7759, 0.1790, 0.1452, 0.3267],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.1957, 0.1186, 0.3139, 0.9475, 0.1077, 0.0333, 0.1849])
output:  tensor([0.7637, 0.4351, 0.2982, 0.6632, 0.1485, 0.1249, 0.3662],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0781, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8383, 0.5099, 0.4158, 0.9692, 0.1117, 0.0372, 0.3175])
output:  tensor([0.4957, 0.5681, 0.4898, 0.6603, 0.1970, 0.1388, 0.4444],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0365, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2242, 0.8291, 0.5923, 0.8788, 0.1058, 0.0688, 0.8516])
output:  tensor([0.4805, 0.5810, 0.6499, 0.5455, 0.2418, 0.1353, 0.5395],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0517, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5018, 0.3465, 0.2131, 0.2978, 0.1086, 0.0463, 0

loss:  tensor(0.0546, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1130, 0.6603, 0.5394, 0.8367, 0.3195, 0.0633, 0.5161])
output:  tensor([0.3652, 0.5380, 0.5109, 0.7053, 0.1916, 0.1702, 0.4405],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0186, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9543, 0.5440, 0.2781, 0.9231, 0.3718, 0.0666, 0.2354])
output:  tensor([0.6289, 0.5091, 0.4177, 0.6686, 0.2188, 0.1437, 0.4062],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0357, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0024, 0.4513, 0.9725, 0.9168, 0.3091, 0.1535, 0.0355])
output:  tensor([0.4333, 0.5022, 0.6207, 0.6283, 0.1855, 0.1430, 0.4890],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0880, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.8528, 0.4501, 0.0950, 0.7155, 0.1886, 0.1770, 0.2614],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0905, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4815, 0.5592, 0.2243, 0.2337, 0.1129, 0.0453, 0.0830])
output:  tensor([0.7542, 0.4697, 0.2671, 0.6470, 0.1889, 0.1313, 0.3467],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0483, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9895, 0.2857, 0.3451, 0.9286, 0.1048, 0.0313, 0.5372])
output:  tensor([0.7723, 0.4335, 0.4702, 0.6181, 0.1884, 0.1257, 0.4635],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0289, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9912, 0.3439, 0.2541, 0.9316, 0.6555, 0.0280, 0.1300])
output:  tensor([0.8098, 0.4762, 0.3174, 0.6431, 0.1615, 0.1088, 0.4025],
       device='cuda:0', gr

output:  tensor([0.9179, 0.4003, 0.1345, 0.8090, 0.1841, 0.1354, 0.3244],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0117, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.3041e-01, 4.0311e-01, 4.6008e-01, 1.6900e-08, 1.3469e-01, 3.3593e-02,
        4.0983e-01])
output:  tensor([0.6734, 0.4834, 0.4880, 0.6078, 0.1814, 0.1474, 0.4550],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0843, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1958, 0.6525, 0.5482, 0.9315, 0.1383, 0.4132, 0.2014])
output:  tensor([0.4107, 0.5059, 0.5355, 0.7484, 0.2061, 0.1671, 0.4225],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0308, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([4.4288e-01, 6.6426e-01, 5.9247e-01, 3.0000e-10, 2.7759e-01, 3.7391e-02,
        9.6653e-01])
output:  tensor([0.4927, 0.4

len labels =  7
len otput[i] =  7
labels:  tensor([0.9632, 0.4749, 0.6460, 0.8304, 0.1023, 0.0421, 0.6737])
output:  tensor([0.6431, 0.4505, 0.5940, 0.5809, 0.2081, 0.1447, 0.4881],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0320, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9511, 0.3897, 0.4027, 0.6775, 0.5438, 0.1649, 0.1440])
output:  tensor([0.8542, 0.4882, 0.1113, 0.6668, 0.2396, 0.1620, 0.2924],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0313, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9735, 0.5886, 0.4131, 0.9409, 0.1148, 0.0671, 0.5720])
output:  tensor([0.7671, 0.4271, 0.4120, 0.6683, 0.1616, 0.1097, 0.4335],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0237, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0094, 0.2883, 0.4478, 0.8802, 0.1107, 0.0433, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.0709, 0.7195, 0.7158, 0.9274, 0.0747, 0.1931, 0.6467])
output:  tensor([0.3146, 0.5379, 0.6952, 0.6186, 0.2110, 0.1529, 0.5118],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0324, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4600, 0.4633, 0.8697, 0.8064, 0.1095, 0.0427, 0.4567])
output:  tensor([0.3840, 0.5092, 0.6644, 0.6132, 0.1976, 0.1189, 0.4563],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0144, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.6016e-01, 7.8274e-01, 2.8127e-01, 3.2018e-05, 8.2622e-02, 3.8092e-02,
        7.4361e-01])
output:  tensor([0.4087, 0.5458, 0.5862, 0.6464, 0.1854, 0.1455, 0.5018],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0956, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1103, 0.402

output:  tensor([0.2541, 0.4685, 0.7955, 0.6502, 0.2120, 0.1328, 0.4933],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0920, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6483, 0.3498, 0.4529, 0.4282, 0.1440, 0.2190, 0.5096])
output:  tensor([0.4212, 0.5330, 0.5975, 0.6238, 0.1691, 0.1496, 0.5129],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0214, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9926, 0.2415, 0.4362, 0.6947, 0.5928, 0.0675, 0.2065])
output:  tensor([0.7046, 0.4149, 0.5054, 0.6647, 0.1835, 0.1220, 0.4460],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0495, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0070, 0.7720, 0.4940, 0.9086, 0.1150, 0.1553, 0.4737])
output:  tensor([0.5555, 0.5323, 0.3841, 0.7046, 0.1747, 0.1549, 0.4384],
       device='cuda:0', gr

loss:  tensor(0.0409, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0700, 0.7208, 0.9104, 0.7399, 0.0960, 0.5806, 0.6254])
output:  tensor([0.2814, 0.5758, 0.6745, 0.6357, 0.2299, 0.1415, 0.5455],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0499, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9306, 0.1919, 0.0357, 0.9507, 0.1109, 0.0396, 0.0374])
output:  tensor([0.5910, 0.5311, 0.3735, 0.6488, 0.1659, 0.1365, 0.4215],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0851, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6711, 0.2967, 0.6455, 0.7320, 0.7209, 0.0370, 0.8776])
output:  tensor([0.7207, 0.4462, 0.4285, 0.6561, 0.1750, 0.1279, 0.4174],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0851, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.7037, 0.5538, 0.3121, 0.5883, 0.1763, 0.1559, 0.4300],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0253, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9476, 0.2913, 0.8772, 0.9555, 0.5301, 0.3853, 0.6201])
output:  tensor([0.3163, 0.5036, 0.6053, 0.7846, 0.1691, 0.1648, 0.4378],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1084, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0050, 0.5057, 0.8079, 0.7924, 0.0563, 0.0695, 0.5218])
output:  tensor([0.2562, 0.5402, 0.6714, 0.7028, 0.2083, 0.1401, 0.4897],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0172, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9952, 0.4413, 0.5864, 0.9417, 0.1606, 0.0450, 0.3745])
output:  tensor([0.8082, 0.4842, 0.4784, 0.5902, 0.1849, 0.1312, 0.4346],
       device='cuda:0', gr

output:  tensor([0.4367, 0.4316, 0.7350, 0.6354, 0.1892, 0.1042, 0.4825],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1172, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8520, 0.5630, 0.1649, 0.7882, 0.1567, 0.0444, 0.4314])
output:  tensor([0.8371, 0.4873, 0.3022, 0.6104, 0.1898, 0.1384, 0.4321],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0095, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9366, 0.5267, 0.6754, 0.5094, 0.1471, 0.0393, 0.9014])
output:  tensor([0.5956, 0.4539, 0.6621, 0.5427, 0.2278, 0.1172, 0.4514],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0483, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.4881, 0.2572, 0.9623, 0.6688, 0.8624, 0.6046])
output:  tensor([0.8276, 0.4753, 0.3137, 0.6109, 0.2017, 0.1753, 0.4671],
       device='cuda:0', gr

labels:  tensor([0.2125, 0.8173, 0.7374, 0.0039, 0.0543, 0.1724, 0.8986])
output:  tensor([0.5468, 0.5428, 0.4568, 0.6473, 0.2067, 0.1544, 0.4283],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1321, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2656, 0.5793, 0.9322, 0.8842, 0.3833, 0.0746, 0.7210])
output:  tensor([0.4273, 0.5222, 0.6435, 0.5861, 0.2097, 0.1400, 0.5314],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0388, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2660, 0.7177, 0.5484, 0.8536, 0.0965, 0.2063, 0.0386])
output:  tensor([0.4970, 0.5682, 0.4582, 0.6616, 0.1931, 0.1446, 0.4629],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0448, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6036, 0.4541, 0.3124, 0.8879, 0.2448, 0.0308, 0.2445])
output:  tensor([0.6756, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.0781, 0.7248, 0.4310, 0.0165, 0.0708, 0.0329, 0.8506])
output:  tensor([0.4244, 0.6023, 0.4797, 0.6429, 0.2093, 0.1619, 0.4636],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1022, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([5.9095e-03, 3.2413e-01, 8.8506e-01, 6.2279e-04, 1.4692e-01, 8.4165e-02,
        7.5166e-01])
output:  tensor([0.6803, 0.4461, 0.4047, 0.6432, 0.1743, 0.1191, 0.4573],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1717, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.1089e-04, 4.9318e-01, 8.1940e-01, 5.9400e-08, 1.7502e-01, 3.8249e-02,
        8.5098e-01])
output:  tensor([0.5600, 0.4324, 0.6999, 0.6177, 0.2184, 0.1109, 0.5123],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1192, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i]

output:  tensor([0.3151, 0.5182, 0.6997, 0.6709, 0.1914, 0.1212, 0.4772],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0491, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9749, 0.6261, 0.0454, 0.9355, 0.1105, 0.0447, 0.5713])
output:  tensor([0.6342, 0.5159, 0.1823, 0.7320, 0.1963, 0.2166, 0.3509],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0391, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8697, 0.9077, 0.3262, 0.8922, 0.1120, 0.1216, 0.1963])
output:  tensor([0.4590, 0.5129, 0.5456, 0.6829, 0.1884, 0.1353, 0.4052],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0666, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7101, 0.3551, 0.9683, 0.5843, 0.7001, 0.1116, 0.3858])
output:  tensor([0.3248, 0.4179, 0.8000, 0.6428, 0.2019, 0.1439, 0.4594],
       device='cuda:0', gr

loss:  tensor(0.0299, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.3110, 0.0251, 0.9005, 0.1393, 0.0373, 0.4055])
output:  tensor([0.9430, 0.3504, 0.0741, 0.7693, 0.1506, 0.1587, 0.3112],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0068, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7042, 0.6444, 0.4489, 0.8062, 0.0657, 0.0294, 0.4393])
output:  tensor([0.6697, 0.4627, 0.4764, 0.6294, 0.1747, 0.1275, 0.4829],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0128, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9946, 0.5975, 0.2634, 0.4067, 0.1759, 0.0411, 0.7763])
output:  tensor([0.6844, 0.4993, 0.5772, 0.6434, 0.1842, 0.1091, 0.4546],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0527, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.5608, 0.4490, 0.6959, 0.6912, 0.1757, 0.1016, 0.4248],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.4234, 0.1389, 0.9717, 0.2492, 0.0469, 0.2756])
output:  tensor([0.7814, 0.4839, 0.1824, 0.6363, 0.2060, 0.2283, 0.4798],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0344, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9627, 0.2439, 0.5959, 0.8300, 0.3395, 0.0698, 0.3910])
output:  tensor([0.6721, 0.4491, 0.5022, 0.6629, 0.1740, 0.1070, 0.4388],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0278, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9956, 0.3570, 0.2626, 0.9725, 0.1104, 0.0291, 0.0369])
output:  tensor([0.6495, 0.4251, 0.4787, 0.6950, 0.1573, 0.1111, 0.4030],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.5286, 0.4810, 0.5867, 0.5727, 0.1141, 0.0263, 0.4479])
output:  tensor([0.4859, 0.4443, 0.6532, 0.6626, 0.1846, 0.0986, 0.4101],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0039, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1985, 0.7019, 0.7894, 0.9240, 0.1122, 0.0441, 0.5171])
output:  tensor([0.3355, 0.5936, 0.5292, 0.6573, 0.1691, 0.1465, 0.5350],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0262, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8446, 0.3705, 0.5699, 0.7898, 0.1492, 0.0382, 0.7191])
output:  tensor([0.4770, 0.4599, 0.6752, 0.6482, 0.2018, 0.1129, 0.4652],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0353, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7993, 0.3936, 0.7209, 0.8745, 0.1269, 0.0468, 0

output:  tensor([0.7623, 0.4853, 0.2450, 0.6546, 0.2307, 0.1442, 0.3798],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0424, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1888, 0.6385, 0.9775, 0.9330, 0.2285, 0.0605, 0.9144])
output:  tensor([0.2592, 0.5224, 0.7330, 0.6265, 0.2490, 0.1208, 0.5103],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0485, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0713, 0.2012, 0.7137, 0.9036, 0.9544, 0.0520, 0.0656])
output:  tensor([0.5849, 0.4742, 0.5069, 0.6120, 0.1722, 0.1161, 0.5143],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1833, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9517, 0.3317, 0.5314, 0.9408, 0.0906, 0.0269, 0.3424])
output:  tensor([0.3897, 0.4558, 0.7594, 0.7212, 0.1926, 0.1076, 0.4139],
       device='cuda:0', gr

output:  tensor([0.4075, 0.4488, 0.6996, 0.6804, 0.1907, 0.1065, 0.4513],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0566, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8870, 0.3236, 0.9498, 0.9710, 0.1438, 0.0567, 0.0378])
output:  tensor([0.3892, 0.4702, 0.7081, 0.6597, 0.2094, 0.1232, 0.4628],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0877, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2542, 0.6391, 0.7841, 0.1947, 0.0952, 0.3487, 0.4444])
output:  tensor([0.3418, 0.5843, 0.6462, 0.6834, 0.2064, 0.1262, 0.4948],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0476, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8127, 0.4235, 0.7261, 0.8499, 0.1197, 0.0510, 0.4913])
output:  tensor([0.5940, 0.4334, 0.6955, 0.6204, 0.1912, 0.0892, 0.4977],
       device='cuda:0', gr

loss:  tensor(0.0535, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9198, 0.5059, 0.3981, 0.0105, 0.1083, 0.0441, 0.5081])
output:  tensor([0.4755, 0.5247, 0.5970, 0.6735, 0.1861, 0.1177, 0.5215],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0984, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.8229e-01, 5.6774e-01, 1.3664e-01, 4.3514e-05, 1.1893e-01, 9.3767e-01,
        6.7437e-01])
output:  tensor([0.7281, 0.5583, 0.2637, 0.6095, 0.2007, 0.1483, 0.3943],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1658, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.2983e-01, 7.9290e-01, 8.0087e-01, 6.2246e-04, 9.4487e-02, 4.4607e-01,
        4.1045e-01])
output:  tensor([0.3867, 0.5764, 0.5888, 0.6471, 0.1958, 0.1472, 0.5169],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0891, device

loss:  tensor(0.0699, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9276, 0.5261, 0.7976, 0.9006, 0.5196, 0.0327, 0.8062])
output:  tensor([0.5377, 0.3818, 0.7166, 0.6972, 0.1750, 0.0951, 0.3889],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0739, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.2754e-03, 3.2755e-01, 9.9131e-01, 3.8024e-05, 4.7123e-01, 2.1926e-01,
        4.6744e-01])
output:  tensor([0.6383, 0.4978, 0.4597, 0.6743, 0.1598, 0.1234, 0.4495],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1814, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9353, 0.5179, 0.4942, 0.8076, 0.1226, 0.0359, 0.2708])
output:  tensor([0.7354, 0.3988, 0.6194, 0.6079, 0.1821, 0.0860, 0.4316],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0202, device='cuda:0', grad_fn=<MseLossBackward0

output:  tensor([0.3355, 0.5833, 0.5369, 0.6582, 0.1962, 0.1455, 0.5554],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0406, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3711, 0.3160, 0.8314, 0.9072, 0.3156, 0.0366, 0.6521])
output:  tensor([0.4376, 0.4036, 0.8025, 0.7248, 0.2086, 0.1038, 0.3690],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0203, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9904, 0.1623, 0.2346, 0.9564, 0.1375, 0.0291, 0.1230])
output:  tensor([0.9151, 0.3954, 0.3729, 0.6823, 0.1873, 0.0968, 0.3749],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0321, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9847, 0.3434, 0.2938, 0.0577, 0.0965, 0.0299, 0.0975])
output:  tensor([0.6914, 0.4991, 0.5689, 0.6582, 0.1965, 0.1090, 0.4370],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.3578, 0.5626, 0.6571, 0.7995, 0.2305, 0.0289, 0.3222])
output:  tensor([0.4098, 0.4648, 0.6999, 0.6859, 0.1745, 0.0916, 0.4731],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0081, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([4.3406e-05, 5.1656e-01, 8.6755e-01, 7.2007e-01, 3.9004e-01, 9.0491e-02,
        1.8606e-01])
output:  tensor([0.1668, 0.3878, 0.8979, 0.7948, 0.1849, 0.1098, 0.2980],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0151, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6052, 0.4529, 0.2599, 0.9652, 0.0994, 0.0758, 0.5172])
output:  tensor([0.4742, 0.5251, 0.4986, 0.6607, 0.1819, 0.1236, 0.4185],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0273, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9943, 0.261

loss:  tensor(0.0166, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1025, 0.7033, 0.6217, 0.8375, 0.1870, 0.0445, 0.0367])
output:  tensor([0.2203, 0.6124, 0.6241, 0.6897, 0.1838, 0.1338, 0.5135],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0399, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9874, 0.4860, 0.2001, 0.8517, 0.1066, 0.0767, 0.4430])
output:  tensor([0.5687, 0.5129, 0.4227, 0.6410, 0.1694, 0.1329, 0.4742],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0397, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1861, 0.7144, 0.2908, 0.0229, 0.1170, 0.0509, 0.8332])
output:  tensor([0.5402, 0.5589, 0.4975, 0.6603, 0.2000, 0.1299, 0.5203],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1014, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.7164, 0.5068, 0.4620, 0.6325, 0.1750, 0.1208, 0.4708],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0476, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9450, 0.2983, 0.0875, 0.8980, 0.6707, 0.0454, 0.1284])
output:  tensor([0.6945, 0.5357, 0.2499, 0.5756, 0.2080, 0.1660, 0.4536],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0834, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8268, 0.7887, 0.1740, 0.8194, 0.1220, 0.0370, 0.4263])
output:  tensor([0.5929, 0.5055, 0.4114, 0.6557, 0.1554, 0.1178, 0.4548],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0324, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1903, 0.3994, 0.1076, 0.9165, 0.1160, 0.0317, 0.1534])
output:  tensor([0.8042, 0.4594, 0.3386, 0.6500, 0.1616, 0.1039, 0.4020],
       device='cuda:0', gr

loss:  tensor(0.0360, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0261, 0.9669, 0.8080, 0.8615, 0.0864, 0.4037, 0.4834])
output:  tensor([0.4017, 0.5256, 0.6789, 0.5913, 0.2139, 0.1361, 0.5321],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0737, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2506, 0.7540, 0.6309, 0.7137, 0.1198, 0.0359, 0.2480])
output:  tensor([0.3881, 0.5959, 0.6204, 0.6687, 0.1957, 0.1168, 0.5429],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0208, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7836, 0.4608, 0.6894, 0.7911, 0.1385, 0.0282, 0.8221])
output:  tensor([0.3693, 0.4034, 0.7968, 0.6931, 0.1942, 0.1064, 0.3997],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0548, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.4300, 0.5013, 0.5665, 0.6502, 0.1577, 0.1101, 0.4771],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0230, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0836, 0.3164, 0.9593, 0.9428, 0.2263, 0.0525, 0.5703])
output:  tensor([0.3366, 0.3953, 0.8332, 0.7302, 0.2019, 0.1085, 0.3877],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0241, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.6047, 0.1163, 0.9449, 0.4283, 0.0721, 0.6774])
output:  tensor([0.8954, 0.4304, 0.1674, 0.6352, 0.2044, 0.2633, 0.4700],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0384, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6511, 0.7574, 0.6301, 0.0008, 0.0759, 0.0894, 0.3311])
output:  tensor([0.2266, 0.5257, 0.7905, 0.6578, 0.2092, 0.1301, 0.4815],
       device='cuda:0', gr

loss:  tensor(0.0753, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8642, 0.5770, 0.3266, 0.4247, 0.0930, 0.0316, 0.4815])
output:  tensor([0.4758, 0.5343, 0.5468, 0.6165, 0.1763, 0.1128, 0.4993],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0360, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.3888e-05, 1.1830e-01, 5.5834e-01, 8.8543e-01, 1.1847e-01, 4.5078e-02,
        4.3030e-01])
output:  tensor([0.2513, 0.3891, 0.8445, 0.6950, 0.1858, 0.1273, 0.3889],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0382, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9539e-01, 4.8102e-01, 1.0779e-01, 7.1878e-06, 3.4665e-01, 5.3056e-02,
        3.4100e-01])
output:  tensor([0.8175, 0.4774, 0.2850, 0.5004, 0.2206, 0.1916, 0.4778],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0525, device

loss:  tensor(0.1279, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4837, 0.4009, 0.4591, 0.4425, 0.1037, 0.0634, 0.0399])
output:  tensor([0.6333, 0.5045, 0.3891, 0.6772, 0.1823, 0.1322, 0.3900],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0324, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0670, 0.3055, 0.5879, 0.8531, 0.2625, 0.0387, 0.2224])
output:  tensor([0.5799, 0.4560, 0.4439, 0.6339, 0.1580, 0.1001, 0.4356],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0592, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9467, 0.7304, 0.0649, 0.9449, 0.1111, 0.0581, 0.5948])
output:  tensor([0.7452, 0.4328, 0.1008, 0.6977, 0.2070, 0.1942, 0.2162],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0518, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0234, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8308, 0.6785, 0.4547, 0.9639, 0.1188, 0.0359, 0.6524])
output:  tensor([0.5201, 0.5160, 0.5259, 0.7015, 0.1624, 0.1165, 0.4578],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0347, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7917, 0.4962, 0.8896, 0.0078, 0.2144, 0.0592, 0.5700])
output:  tensor([0.4177, 0.4288, 0.8177, 0.6713, 0.2100, 0.1090, 0.4547],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0865, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.1198e-05, 5.2870e-01, 3.0314e-01, 9.5229e-01, 9.0106e-02, 3.4818e-02,
        3.9103e-02])
output:  tensor([0.2862, 0.6158, 0.3526, 0.7360, 0.1718, 0.1528, 0.3990],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0413, device='cuda:0', grad_fn=<MseLossBackward0

output:  tensor([0.4908, 0.5770, 0.4364, 0.5989, 0.1921, 0.1409, 0.5047],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0845, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1144, 0.6392, 0.6997, 0.0447, 0.1188, 0.0554, 0.5680])
output:  tensor([0.6343, 0.5042, 0.6626, 0.5652, 0.2080, 0.1169, 0.5314],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0820, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9463e-01, 8.1519e-01, 2.4658e-01, 5.0171e-06, 1.2132e-01, 8.6324e-02,
        7.2094e-01])
output:  tensor([0.8434, 0.4415, 0.1976, 0.6224, 0.1860, 0.1813, 0.4441],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0917, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9887, 0.4327, 0.6511, 0.9320, 0.1056, 0.0307, 0.6785])
output:  tensor([0.7656, 0.4461, 0.4728, 0.6464, 0.1699, 0.1074,

len labels =  7
len otput[i] =  7
labels:  tensor([3.9508e-01, 7.8326e-01, 8.0688e-01, 8.5500e-08, 6.3020e-01, 3.5041e-01,
        7.9710e-01])
output:  tensor([0.3861, 0.5073, 0.7335, 0.6067, 0.2284, 0.1234, 0.4999],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1073, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7038, 0.4168, 0.9325, 0.8174, 0.0833, 0.0832, 0.1276])
output:  tensor([0.2523, 0.4291, 0.8642, 0.6759, 0.2213, 0.1169, 0.4202],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0478, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3587, 0.1299, 0.7143, 0.8488, 0.1916, 0.0396, 0.1847])
output:  tensor([0.5297, 0.4863, 0.5843, 0.6500, 0.1806, 0.0973, 0.4593],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0417, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3392, 0.723

output:  tensor([0.5885, 0.4407, 0.5849, 0.6101, 0.1746, 0.0993, 0.4806],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0604, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7663, 0.6534, 0.4170, 0.9215, 0.1080, 0.0326, 0.2919])
output:  tensor([0.7168, 0.5295, 0.4045, 0.6241, 0.1941, 0.1249, 0.4204],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0198, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0405, 0.5583, 0.9132, 0.9283, 0.1821, 0.1657, 0.4221])
output:  tensor([0.2133, 0.4453, 0.8772, 0.7186, 0.2160, 0.1332, 0.3582],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0135, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9921, 0.2437, 0.0537, 0.7889, 0.1050, 0.0399, 0.1574])
output:  tensor([0.8748, 0.4007, 0.0940, 0.7236, 0.1762, 0.1640, 0.2662],
       device='cuda:0', gr

output:  tensor([0.4886, 0.5409, 0.6791, 0.7239, 0.2294, 0.1309, 0.4242],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0509, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1532, 0.5712, 0.6357, 0.0128, 0.1071, 0.0885, 0.6912])
output:  tensor([0.1748, 0.5936, 0.7031, 0.6851, 0.1977, 0.1596, 0.4599],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0749, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2275, 0.3935, 0.5125, 0.5722, 0.2108, 0.0302, 0.3816])
output:  tensor([0.4648, 0.4457, 0.7411, 0.6194, 0.2248, 0.1077, 0.5176],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0198, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9696, 0.4122, 0.2773, 0.1678, 0.0748, 0.0279, 0.1249])
output:  tensor([0.5581, 0.5130, 0.4076, 0.6622, 0.1600, 0.1174, 0.4417],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.0369, 0.6276, 0.2030, 0.9293, 0.0768, 0.0390, 0.1595])
output:  tensor([0.7781, 0.4078, 0.0902, 0.8178, 0.1493, 0.1586, 0.2136],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0922, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.7554e-04, 2.6535e-01, 5.7241e-01, 8.4440e-01, 1.5127e-01, 3.3351e-02,
        3.4007e-01])
output:  tensor([0.3480, 0.5134, 0.5785, 0.6557, 0.1730, 0.1312, 0.5052],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0365, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6360, 0.1153, 0.5039, 0.9366, 0.1032, 0.1248, 0.0624])
output:  tensor([0.3532, 0.4376, 0.7943, 0.6628, 0.2280, 0.1282, 0.4098],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0685, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5673, 0.365

labels:  tensor([0.0020, 0.5546, 0.7115, 0.8313, 0.1031, 0.0301, 0.5981])
output:  tensor([0.5604, 0.4815, 0.5692, 0.6790, 0.1769, 0.1043, 0.4476],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0563, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3403, 0.7995, 0.8834, 0.9118, 0.1229, 0.0310, 0.4503])
output:  tensor([0.5767, 0.5505, 0.5568, 0.5877, 0.2314, 0.1287, 0.5273],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0510, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7212, 0.8578, 0.8728, 0.9648, 0.1111, 0.1264, 0.4887])
output:  tensor([0.4620, 0.6387, 0.4581, 0.6767, 0.1866, 0.1448, 0.4818],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0538, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1211, 0.5836, 0.8641, 0.0788, 0.1090, 0.0365, 0.8281])
output:  tensor([0.4415, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.9310, 0.3426, 0.4694, 0.9356, 0.1430, 0.0476, 0.0874])
output:  tensor([0.5625, 0.4235, 0.7066, 0.7037, 0.2284, 0.1322, 0.4906],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0613, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2824, 0.3546, 0.7397, 0.0000, 0.1497, 0.0818, 0.5189])
output:  tensor([0.5153, 0.5535, 0.5401, 0.5780, 0.1988, 0.1169, 0.5232],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0673, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8220, 0.4781, 0.5037, 0.9260, 0.0966, 0.0304, 0.5092])
output:  tensor([0.6214, 0.4257, 0.4878, 0.6760, 0.1698, 0.0896, 0.3875],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0185, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5250, 0.5112, 0.8540, 0.8016, 0.1188, 0.0318, 0

loss:  tensor(0.0716, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.4833e-04, 5.0207e-01, 9.1839e-01, 8.7676e-01, 1.3814e-01, 1.5577e-01,
        6.6770e-01])
output:  tensor([0.3829, 0.5828, 0.6707, 0.5441, 0.2319, 0.1326, 0.5331],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0503, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9894, 0.2693, 0.1994, 0.6473, 0.9106, 0.0465, 0.1636])
output:  tensor([0.6802, 0.5257, 0.3952, 0.6338, 0.1904, 0.1160, 0.4632],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1162, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.2455e-01, 5.6014e-01, 1.6227e-01, 3.6883e-06, 1.6488e-01, 6.7523e-01,
        3.7825e-01])
output:  tensor([0.8798, 0.3806, 0.1080, 0.6801, 0.1655, 0.1292, 0.2402],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1168, device

loss:  tensor(0.0856, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0325, 0.7591, 0.4630, 0.9054, 0.1045, 0.1007, 0.4017])
output:  tensor([0.1641, 0.6236, 0.7589, 0.6377, 0.2471, 0.1383, 0.5147],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0328, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9276, 0.3426, 0.0947, 0.8770, 0.1194, 0.0282, 0.4973])
output:  tensor([0.7965, 0.3772, 0.1319, 0.7203, 0.1519, 0.1175, 0.2344],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0175, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0012, 0.5947, 0.5655, 0.8332, 0.2442, 0.0785, 0.8128])
output:  tensor([0.5135, 0.5478, 0.3924, 0.6816, 0.1709, 0.1236, 0.4559],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0646, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.4997, 0.5001, 0.5013, 0.6719, 0.1666, 0.1190, 0.4119],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0799, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([5.2687e-02, 6.2306e-01, 7.6666e-01, 3.5025e-05, 4.8759e-02, 2.6779e-01,
        4.5944e-01])
output:  tensor([0.2208, 0.6402, 0.7023, 0.5753, 0.2383, 0.1540, 0.5505],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0601, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8825, 0.2957, 0.4826, 0.9488, 0.3237, 0.0365, 0.5158])
output:  tensor([0.6944, 0.4821, 0.3524, 0.6777, 0.1564, 0.1110, 0.3720],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0307, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2826, 0.4447, 0.5938, 0.8785, 0.0955, 0.0414, 0.1986])
output:  tensor([0.3017, 0.5129, 0.7315, 0.5898, 0.2105, 0.1038,

loss:  tensor(0.0591, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4598, 0.5954, 0.6731, 0.1093, 0.1548, 0.0395, 0.1327])
output:  tensor([0.5764, 0.5208, 0.6594, 0.6797, 0.2095, 0.0991, 0.4420],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0638, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0083, 0.3929, 0.8348, 0.7312, 0.0462, 0.0442, 0.5498])
output:  tensor([0.2939, 0.4547, 0.7595, 0.5687, 0.2245, 0.1287, 0.4668],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0233, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1969, 0.6243, 0.9066, 0.8874, 0.3490, 0.0641, 0.9008])
output:  tensor([0.3246, 0.5102, 0.7161, 0.6273, 0.2002, 0.1227, 0.5018],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0454, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.5668, 0.3943, 0.6757, 0.6218, 0.1963, 0.1047, 0.4070],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0447, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1764, 0.4088, 0.6653, 0.0049, 0.3738, 0.0805, 0.4780])
output:  tensor([0.4591, 0.5340, 0.5823, 0.6062, 0.1791, 0.1030, 0.4763],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0718, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7007, 0.5104, 0.4828, 0.9518, 0.1616, 0.0354, 0.0384])
output:  tensor([0.6352, 0.4312, 0.6411, 0.5604, 0.1994, 0.0940, 0.4589],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0529, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.9174e-01, 7.4843e-01, 2.2984e-01, 1.1211e-05, 8.9840e-02, 4.1753e-02,
        9.4770e-01])
output:  tensor([0.4888, 0.6095, 0.5040, 0.5064, 0.2161, 0.1107,

loss:  tensor(0.0649, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.3840, 0.4322, 0.9723, 0.4864, 0.0934, 0.8556])
output:  tensor([0.8559, 0.4031, 0.1705, 0.7032, 0.1739, 0.1707, 0.3990],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0676, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9135, 0.8618, 0.7981, 0.8089, 0.1107, 0.4102, 0.9667])
output:  tensor([0.6724, 0.4962, 0.5440, 0.5734, 0.2309, 0.1402, 0.4789],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0910, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.7464e-01, 3.4205e-01, 6.7211e-01, 2.9076e-05, 6.5325e-02, 3.3964e-02,
        4.0005e-01])
output:  tensor([0.3309, 0.3835, 0.8310, 0.6107, 0.2153, 0.1207, 0.3459],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0623, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.0605, 0.3490, 0.9647, 0.9164, 0.0860, 0.0958, 0.5566])
output:  tensor([0.1934, 0.4237, 0.8304, 0.6848, 0.2114, 0.1331, 0.3810],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0204, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4366, 0.6353, 0.2947, 0.1816, 0.2091, 0.0621, 0.8771])
output:  tensor([0.5628, 0.5076, 0.4096, 0.6186, 0.1668, 0.1210, 0.4578],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0596, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8051, 0.4699, 0.9380, 0.6567, 0.6546, 0.0468, 0.4720])
output:  tensor([0.5273, 0.4076, 0.7304, 0.5884, 0.2074, 0.1085, 0.4225],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0479, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3280, 0.3297, 0.3005, 0.9408, 0.7245, 0.0524, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.9605, 0.3787, 0.6004, 0.9546, 0.1296, 0.0282, 0.4960])
output:  tensor([0.5486, 0.5370, 0.4654, 0.6100, 0.1777, 0.1171, 0.4846],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0489, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9951, 0.4219, 0.0291, 0.8988, 0.1010, 0.0635, 0.2535])
output:  tensor([0.8290, 0.3721, 0.2312, 0.7243, 0.1565, 0.1191, 0.2774],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0154, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5677, 0.2663, 0.2324, 0.8641, 0.3054, 0.0392, 0.1142])
output:  tensor([0.6563, 0.4742, 0.2862, 0.6884, 0.1528, 0.1153, 0.3570],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0247, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4684, 0.2136, 0.5455, 0.4755, 0.1785, 0.0455, 0

loss:  tensor(0.0860, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0020, 0.2224, 0.6468, 0.1738, 0.3138, 0.0314, 0.6734])
output:  tensor([0.7752, 0.4661, 0.4578, 0.6335, 0.1731, 0.1044, 0.4146],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1423, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9727, 0.3696, 0.4368, 0.9151, 0.1303, 0.0392, 0.7943])
output:  tensor([0.5687, 0.4574, 0.5595, 0.5954, 0.1933, 0.1140, 0.4451],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0600, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2492, 0.4952, 0.6579, 0.6763, 0.1477, 0.0297, 0.8930])
output:  tensor([0.4999, 0.5045, 0.5272, 0.6143, 0.1757, 0.1013, 0.4475],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0412, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.7121, 0.4750, 0.4028, 0.5828, 0.1848, 0.1123, 0.4187],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0584, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9435, 0.1106, 0.1970, 0.9447, 0.1897, 0.0407, 0.0745])
output:  tensor([0.4542, 0.5022, 0.5807, 0.5823, 0.1808, 0.1040, 0.4504],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1167, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5962, 0.7457, 0.6753, 0.8015, 0.1345, 0.4632, 0.6522])
output:  tensor([0.3565, 0.5506, 0.7053, 0.5944, 0.2211, 0.1262, 0.5419],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0389, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.8386e-01, 7.7240e-01, 6.4331e-01, 8.2274e-05, 1.0189e-01, 1.6591e-01,
        9.7339e-01])
output:  tensor([0.4188, 0.5339, 0.6462, 0.6056, 0.2262, 0.1215,

output:  tensor([0.2999, 0.4357, 0.7113, 0.6515, 0.1845, 0.1236, 0.4132],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0517, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9816, 0.2832, 0.0435, 0.6922, 0.1028, 0.0352, 0.0639])
output:  tensor([0.8149, 0.4146, 0.1615, 0.7713, 0.1460, 0.1302, 0.3169],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0200, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6899, 0.4075, 0.5491, 0.8841, 0.1127, 0.1030, 0.4060])
output:  tensor([0.5948, 0.4994, 0.4584, 0.6138, 0.1570, 0.0968, 0.4595],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0148, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2685, 0.7971, 0.4903, 0.0337, 0.0742, 0.0923, 0.1846])
output:  tensor([0.1777, 0.6291, 0.7254, 0.6026, 0.2270, 0.1607, 0.5447],
       device='cuda:0', gr

loss:  tensor(0.0453, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8006, 0.5545, 0.5996, 0.1741, 0.4304, 0.0971, 0.2243])
output:  tensor([0.3884, 0.4847, 0.6109, 0.6315, 0.1912, 0.1094, 0.4114],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0681, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9580e-01, 5.6392e-01, 1.6660e-01, 1.0695e-05, 1.3749e-01, 5.7003e-02,
        9.2952e-01])
output:  tensor([0.6093, 0.5871, 0.5214, 0.6116, 0.2345, 0.1487, 0.5530],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1156, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0129, 0.2031, 0.9110, 0.9600, 0.2387, 0.0767, 0.1484])
output:  tensor([0.4035, 0.3745, 0.7824, 0.6240, 0.1929, 0.1097, 0.3701],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0520, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.9550, 0.6403, 0.3178, 0.8745, 0.1279, 0.4072, 0.3962])
output:  tensor([0.5816, 0.5225, 0.4885, 0.5709, 0.1782, 0.0974, 0.4644],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0540, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9173, 0.5708, 0.3667, 0.0048, 0.1124, 0.1057, 0.5729])
output:  tensor([0.5082, 0.4975, 0.6016, 0.5613, 0.2049, 0.1018, 0.4849],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0791, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9881, 0.4518, 0.3865, 0.9435, 0.2229, 0.0350, 0.4494])
output:  tensor([0.8364, 0.3690, 0.4492, 0.6802, 0.1701, 0.0826, 0.3529],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0168, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2593, 0.7808, 0.3742, 0.3117, 0.1071, 0.0549, 0

labels:  tensor([0.9570, 0.3990, 0.3181, 0.5568, 0.0900, 0.4236, 0.4193])
output:  tensor([0.6978, 0.5575, 0.2399, 0.6376, 0.1915, 0.1564, 0.3908],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0268, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3265, 0.2828, 0.5223, 0.0085, 0.0875, 0.0285, 0.2100])
output:  tensor([0.6812, 0.4274, 0.6042, 0.5562, 0.2098, 0.0818, 0.4279],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0741, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2207, 0.2600, 0.6908, 0.4166, 0.4463, 0.0367, 0.6970])
output:  tensor([0.5585, 0.3312, 0.6966, 0.6112, 0.1971, 0.1044, 0.3979],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0448, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.4761e-06, 5.2886e-01, 9.2210e-01, 7.7877e-01, 1.1140e-01, 2.9303e-02,
        7.9

loss:  tensor(0.0611, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3066, 0.6165, 0.5771, 0.8902, 0.4893, 0.1224, 0.0497])
output:  tensor([0.2709, 0.6069, 0.6380, 0.5942, 0.2326, 0.1333, 0.5210],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0544, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.4914, 0.9249, 0.9644, 0.2648, 0.0398, 0.1056])
output:  tensor([0.6835, 0.4829, 0.4901, 0.5567, 0.1869, 0.1372, 0.5300],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0926, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1473, 0.3064, 0.9806, 0.9402, 0.1161, 0.0680, 0.2087])
output:  tensor([0.1414, 0.4967, 0.7712, 0.6983, 0.2062, 0.1469, 0.3543],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0249, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0994, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6939, 0.2450, 0.6991, 0.9195, 0.0976, 0.0329, 0.1473])
output:  tensor([0.4683, 0.4901, 0.3732, 0.6515, 0.1788, 0.1519, 0.4247],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0553, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9908, 0.4637, 0.3292, 0.9470, 0.1963, 0.0373, 0.1512])
output:  tensor([0.7382, 0.4903, 0.4784, 0.6340, 0.1946, 0.0932, 0.4308],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0380, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0224, 0.7967, 0.7666, 0.9224, 0.0672, 0.0612, 0.8525])
output:  tensor([0.1937, 0.6752, 0.6291, 0.5972, 0.1916, 0.1348, 0.5107],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0896, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8669, 0.4399, 0.4954, 0.0115, 0.1055, 0.1932, 0.6772])
output:  tensor([0.5443, 0.5263, 0.5980, 0.5656, 0.1968, 0.0971, 0.5011],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0682, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8682, 0.3628, 0.7283, 0.7717, 0.2143, 0.0450, 0.2943])
output:  tensor([0.5183, 0.4793, 0.5596, 0.6759, 0.1878, 0.1082, 0.3851],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0267, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.2193e-04, 7.1024e-01, 5.1919e-01, 9.0813e-01, 1.0366e-01, 3.0204e-01,
        3.9847e-01])
output:  tensor([0.4251, 0.6306, 0.4030, 0.6315, 0.2142, 0.1521, 0.4809],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0455, device='cuda:0', grad_fn=<MseLossBackward0

loss:  tensor(0.0615, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.1277e-01, 4.1258e-01, 7.2900e-01, 9.9530e-06, 6.8156e-02, 3.6122e-02,
        8.8682e-01])
output:  tensor([0.6737, 0.4382, 0.5449, 0.6112, 0.1913, 0.0909, 0.4720],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0936, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0796, 0.7823, 0.5094, 0.8671, 0.0982, 0.0619, 0.8760])
output:  tensor([0.5179, 0.5742, 0.4404, 0.6186, 0.1743, 0.1255, 0.4414],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0715, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9898, 0.9545, 0.1555, 0.8007, 0.3260, 0.0971, 0.3751])
output:  tensor([0.7739, 0.4998, 0.2706, 0.6613, 0.1908, 0.1347, 0.3627],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.4572, 0.7826, 0.4009, 0.8421, 0.1037, 0.4769, 0.5905])
output:  tensor([0.2543, 0.6305, 0.5638, 0.6500, 0.1730, 0.1195, 0.4784],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0390, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1366, 0.3449, 0.9810, 0.7114, 0.2480, 0.0432, 0.7728])
output:  tensor([0.4393, 0.4249, 0.6609, 0.5414, 0.1876, 0.1010, 0.5212],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0428, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.0909e-05, 2.3283e-01, 9.4786e-01, 2.2076e-03, 3.9645e-01, 9.0599e-02,
        2.4105e-01])
output:  tensor([0.1959, 0.3522, 0.8207, 0.6521, 0.1761, 0.1295, 0.3844],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0803, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0653, 0.586

output:  tensor([0.5148, 0.3832, 0.6966, 0.5887, 0.1844, 0.0830, 0.4688],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0196, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9884, 0.4791, 0.6388, 0.9511, 0.2459, 0.0354, 0.7882])
output:  tensor([0.8389, 0.4200, 0.4861, 0.6348, 0.1880, 0.0901, 0.4630],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0373, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3265, 0.5469, 0.1876, 0.9169, 0.1051, 0.0637, 0.5760])
output:  tensor([0.5076, 0.5986, 0.2760, 0.6634, 0.1965, 0.1637, 0.3544],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0250, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.8391e-01, 7.0943e-02, 4.0263e-04, 9.6159e-01, 1.1190e-01, 4.6790e-02,
        9.0653e-02])
output:  tensor([0.7326, 0.5712, 0.3220, 0.6439, 0.2190, 0.1302,

loss:  tensor(0.0272, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1378, 0.3275, 0.3337, 0.2905, 0.0895, 0.0334, 0.1753])
output:  tensor([0.6496, 0.4178, 0.5359, 0.6666, 0.1717, 0.0976, 0.4138],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0743, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0434, 0.2327, 0.2299, 0.8130, 0.1113, 0.0287, 0.1374])
output:  tensor([0.3797, 0.4607, 0.5928, 0.6937, 0.1594, 0.0920, 0.3922],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0546, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9877, 0.5882, 0.3696, 0.9287, 0.0809, 0.0405, 0.3797])
output:  tensor([0.6216, 0.4621, 0.4960, 0.6041, 0.1998, 0.1172, 0.4836],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0432, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.3228, 0.3067, 0.8149, 0.7413, 0.1598, 0.0879, 0.2617],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0736, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9578e-01, 7.1448e-01, 1.6101e-01, 3.8536e-04, 8.6724e-01, 6.3054e-02,
        8.7054e-01])
output:  tensor([0.8299, 0.5211, 0.3416, 0.4861, 0.2618, 0.1857, 0.5199],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1197, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.0384e-01, 6.9397e-01, 3.9202e-01, 9.9890e-07, 1.4526e-01, 3.8229e-02,
        5.1925e-01])
output:  tensor([0.6233, 0.4483, 0.5341, 0.5805, 0.1810, 0.0869, 0.4492],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0655, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9134, 0.3181, 0.7951, 0.9590, 0.1000, 0.0765, 0.0812])
output:  tensor([0.6633, 0.4

len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.4660, 0.2737, 0.9724, 0.3717, 0.0453, 0.7097])
output:  tensor([0.8612, 0.5335, 0.2265, 0.6368, 0.2577, 0.2459, 0.5422],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0313, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0082, 0.3808, 0.9438, 0.9323, 0.2259, 0.1636, 0.3868])
output:  tensor([0.3132, 0.4073, 0.7967, 0.7280, 0.1964, 0.1087, 0.3635],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0231, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9540, 0.4970, 0.2885, 0.0222, 0.1863, 0.4974, 0.8099])
output:  tensor([0.8282, 0.5266, 0.2698, 0.5634, 0.2261, 0.1346, 0.3765],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0901, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1184, 0.5682, 0.6741, 0.0317, 0.1408, 0.0701, 0

output:  tensor([0.5442, 0.5548, 0.5314, 0.5799, 0.1800, 0.1166, 0.4676],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0406, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9955, 0.3464, 0.0026, 0.9198, 0.1244, 0.0459, 0.0685])
output:  tensor([0.9460, 0.4326, 0.0956, 0.7168, 0.1826, 0.1398, 0.2530],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0151, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9854, 0.4430, 0.1031, 0.9094, 0.1073, 0.0310, 0.0379])
output:  tensor([0.8630, 0.4556, 0.1513, 0.6386, 0.2101, 0.1632, 0.2693],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0246, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1097, 0.5996, 0.6065, 0.3343, 0.1466, 0.0726, 0.1610])
output:  tensor([0.7092, 0.4814, 0.5539, 0.5722, 0.1730, 0.0821, 0.4694],
       device='cuda:0', gr

labels:  tensor([0.9485, 0.4642, 0.7210, 0.9619, 0.1810, 0.0331, 0.9130])
output:  tensor([0.4872, 0.4109, 0.6302, 0.6621, 0.1680, 0.0942, 0.3844],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0853, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0012, 0.5832, 0.6692, 0.8458, 0.1092, 0.0287, 0.7257])
output:  tensor([0.5708, 0.4354, 0.6174, 0.5818, 0.1880, 0.0936, 0.5067],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0681, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9324, 0.3302, 0.2179, 0.9566, 0.1011, 0.0281, 0.0768])
output:  tensor([0.6460, 0.4620, 0.5155, 0.6757, 0.1583, 0.0910, 0.3937],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0535, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0042, 0.5382, 0.9858, 0.8462, 0.1636, 0.2131, 0.5912])
output:  tensor([0.1585, 0

loss:  tensor(0.1098, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2906, 0.6773, 0.6702, 0.2662, 0.0710, 0.0290, 0.9241])
output:  tensor([0.6790, 0.3662, 0.6342, 0.5708, 0.2099, 0.0847, 0.4660],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0820, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5870, 0.3900, 0.0988, 0.7839, 0.3290, 0.0495, 0.0604])
output:  tensor([0.7692, 0.4854, 0.2085, 0.6116, 0.2033, 0.1676, 0.3422],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0276, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.5642, 0.1418, 0.9490, 0.2764, 0.0352, 0.8643])
output:  tensor([0.8168, 0.4738, 0.2161, 0.6024, 0.2589, 0.2271, 0.3757],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0631, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.5869, 0.5801, 0.4061, 0.6067, 0.1972, 0.1380, 0.4420],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0272, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9774, 0.5400, 0.6467, 0.9085, 0.1121, 0.0452, 0.3453])
output:  tensor([0.7252, 0.3967, 0.4716, 0.6819, 0.1615, 0.0802, 0.3875],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0245, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([6.5410e-01, 7.0533e-01, 4.7512e-01, 5.4000e-09, 1.1284e-01, 4.2610e-01,
        8.2981e-01])
output:  tensor([0.5888, 0.6173, 0.5547, 0.5834, 0.2552, 0.1437, 0.6201],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0718, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0180, 0.6322, 0.9049, 0.8882, 0.1177, 0.0530, 0.4479])
output:  tensor([0.1029, 0.5416, 0.7958, 0.6844, 0.1687, 0.1189,

len otput[i] =  7
labels:  tensor([0.7943, 0.4592, 0.6641, 0.9202, 0.6210, 0.0442, 0.6303])
output:  tensor([0.3580, 0.4618, 0.5949, 0.6702, 0.1611, 0.1161, 0.4082],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0748, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1460, 0.4735, 0.8442, 0.8856, 0.3385, 0.0472, 0.7497])
output:  tensor([0.0999, 0.4657, 0.8177, 0.6974, 0.1644, 0.1170, 0.4143],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0266, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8022, 0.6419, 0.1669, 0.6974, 0.0775, 0.0310, 0.4831])
output:  tensor([0.6435, 0.5271, 0.4217, 0.5879, 0.2034, 0.1206, 0.4803],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0199, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8127, 0.3414, 0.8779, 0.9097, 0.5028, 0.5806, 0.1834])
output: 

loss:  tensor(0.0144, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9852, 0.2197, 0.8280, 0.9461, 0.3829, 0.0635, 0.0392])
output:  tensor([0.7842, 0.3939, 0.5104, 0.6247, 0.1815, 0.0696, 0.3796],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0616, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9836, 0.1728, 0.3231, 0.9282, 0.0767, 0.0355, 0.0360])
output:  tensor([0.8889, 0.3399, 0.3962, 0.6396, 0.2046, 0.0970, 0.3108],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0316, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3280, 0.7282, 0.7881, 0.0012, 0.6732, 0.2148, 0.8842])
output:  tensor([0.3130, 0.5715, 0.7252, 0.5436, 0.2181, 0.1166, 0.5670],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0915, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

labels:  tensor([0.9564, 0.3089, 0.7581, 0.9568, 0.1197, 0.0906, 0.2085])
output:  tensor([0.7224, 0.3939, 0.5208, 0.6622, 0.1723, 0.0900, 0.4376],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0372, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9880, 0.5475, 0.5793, 0.9665, 0.1118, 0.0361, 0.7088])
output:  tensor([0.8159, 0.4189, 0.3787, 0.6666, 0.1786, 0.0924, 0.3474],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0449, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0045, 0.3298, 0.7561, 0.8994, 0.6098, 0.0390, 0.3113])
output:  tensor([0.2676, 0.3641, 0.8232, 0.6624, 0.1631, 0.0990, 0.3995],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0489, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0849, 0.6638, 0.4450, 0.6615, 0.1115, 0.1093, 0.5863])
output:  tensor([0.1947, 0

len labels =  7
len otput[i] =  7
labels:  tensor([7.3923e-01, 6.0117e-01, 4.6692e-01, 2.9703e-06, 8.9146e-02, 4.4151e-02,
        7.5641e-02])
output:  tensor([0.2813, 0.4256, 0.7784, 0.6261, 0.1762, 0.1017, 0.4328],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1240, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8660, 0.6274, 0.2659, 0.2923, 0.0705, 0.0545, 0.3190])
output:  tensor([0.7644, 0.4632, 0.4881, 0.5926, 0.1721, 0.0735, 0.4736],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0302, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9947, 0.3920, 0.1330, 0.5095, 0.4852, 0.0409, 0.8113])
output:  tensor([0.8790, 0.4309, 0.4022, 0.5815, 0.1872, 0.1018, 0.4787],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0422, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4434, 0.592

labels:  tensor([9.9775e-02, 7.9543e-01, 4.8694e-01, 1.3581e-04, 8.0953e-02, 4.3637e-01,
        8.5305e-01])
output:  tensor([0.5146, 0.6239, 0.5859, 0.5824, 0.2072, 0.1302, 0.6035],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1032, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([6.3260e-04, 4.0686e-01, 9.9644e-01, 8.2994e-02, 3.7089e-01, 9.3183e-02,
        1.1330e-01])
output:  tensor([0.3761, 0.5115, 0.6724, 0.6008, 0.1765, 0.1136, 0.5088],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1028, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1552, 0.6177, 0.3442, 0.9366, 0.0662, 0.2790, 0.2004])
output:  tensor([0.7969, 0.5156, 0.1804, 0.7016, 0.2082, 0.1652, 0.2826],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0777, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9755, 0.3

output:  tensor([0.1310, 0.4953, 0.7873, 0.6791, 0.1713, 0.1118, 0.4157],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0227, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0106, 0.8632, 0.4228, 0.9328, 0.1127, 0.1135, 0.4837])
output:  tensor([0.4888, 0.6597, 0.3367, 0.6241, 0.2277, 0.1856, 0.4347],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0562, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8480, 0.1991, 0.2742, 0.9529, 0.1085, 0.0408, 0.1051])
output:  tensor([0.7744, 0.3601, 0.4926, 0.6672, 0.1703, 0.0763, 0.4128],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0372, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1228, 0.3229, 0.9621, 0.9584, 0.1309, 0.0524, 0.1658])
output:  tensor([0.2669, 0.4186, 0.8203, 0.6862, 0.1767, 0.1194, 0.4074],
       device='cuda:0', gr

labels:  tensor([0.8155, 0.1441, 0.8927, 0.9004, 0.1047, 0.1023, 0.0349])
output:  tensor([0.4069, 0.3847, 0.6902, 0.6816, 0.1787, 0.0917, 0.3370],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0587, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2991, 0.2912, 0.2886, 0.8565, 0.2542, 0.0400, 0.1870])
output:  tensor([0.3994, 0.5709, 0.5027, 0.6992, 0.1540, 0.1125, 0.4592],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0355, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9408, 0.2439, 0.1047, 0.9581, 0.1103, 0.0423, 0.0389])
output:  tensor([0.8129, 0.3845, 0.3602, 0.7005, 0.1961, 0.1071, 0.2939],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0349, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9900, 0.4835, 0.0979, 0.1635, 0.1137, 0.0363, 0.1849])
output:  tensor([0.6916, 0

loss:  tensor(0.0285, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7786, 0.7182, 0.9598, 0.0031, 0.0543, 0.0714, 0.0647])
output:  tensor([0.4272, 0.4265, 0.7676, 0.5955, 0.1917, 0.0999, 0.4839],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1131, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9229, 0.4314, 0.4983, 0.9096, 0.1223, 0.0331, 0.3826])
output:  tensor([0.5854, 0.5016, 0.6024, 0.5844, 0.1874, 0.0837, 0.5239],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0375, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8418, 0.4747, 0.4104, 0.8584, 0.1021, 0.0322, 0.4268])
output:  tensor([0.5645, 0.4602, 0.6117, 0.6032, 0.1929, 0.0989, 0.5023],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0287, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0383, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8796, 0.2424, 0.8733, 0.4796, 0.1467, 0.0799, 0.1572])
output:  tensor([0.6247, 0.3737, 0.6024, 0.5911, 0.2032, 0.0970, 0.4643],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0380, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8845, 0.6150, 0.1768, 0.5729, 0.0577, 0.1688, 0.0851])
output:  tensor([0.8720, 0.4950, 0.1378, 0.6990, 0.2078, 0.1652, 0.2766],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0130, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7481, 0.6195, 0.7489, 0.9375, 0.4876, 0.0700, 0.9581])
output:  tensor([0.6806, 0.4553, 0.4782, 0.6297, 0.1582, 0.0807, 0.4523],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0806, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0770, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2963, 0.5476, 0.7541, 0.8605, 0.1319, 0.0265, 0.1638])
output:  tensor([0.3991, 0.5242, 0.6209, 0.6344, 0.1807, 0.1040, 0.4457],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0240, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6897, 0.6533, 0.5074, 0.9525, 0.0822, 0.0393, 0.1739])
output:  tensor([0.5293, 0.5377, 0.4203, 0.6915, 0.1742, 0.1180, 0.3759],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0243, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7576, 0.3388, 0.3130, 0.2743, 0.1756, 0.0293, 0.1383])
output:  tensor([0.6231, 0.4147, 0.6189, 0.6273, 0.1934, 0.1005, 0.4658],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0507, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

len labels =  7
len otput[i] =  7
labels:  tensor([0.3855, 0.6172, 0.3768, 0.0045, 0.0557, 0.0575, 0.3853])
output:  tensor([0.7185, 0.5104, 0.3742, 0.5645, 0.2094, 0.1225, 0.4673],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0672, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3158, 0.1958, 0.7453, 0.4874, 0.3805, 0.1274, 0.1013])
output:  tensor([0.6077, 0.4693, 0.5951, 0.5629, 0.2064, 0.0962, 0.5012],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0542, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9956, 0.1444, 0.0398, 0.8988, 0.3578, 0.0423, 0.0766])
output:  tensor([0.8772, 0.4655, 0.2128, 0.4893, 0.2871, 0.1941, 0.4387],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0677, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.3946, 0.0694, 0.9496, 0.1146, 0.0403, 0

loss:  tensor(0.0707, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3257, 0.5863, 0.4910, 0.0022, 0.1231, 0.4833, 0.7531])
output:  tensor([0.4232, 0.6336, 0.5479, 0.5686, 0.1774, 0.1163, 0.5506],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0735, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.2692e-01, 6.3823e-01, 7.1905e-01, 2.1396e-04, 2.4189e-01, 2.9063e-01,
        7.1968e-01])
output:  tensor([0.4004, 0.5689, 0.5581, 0.6085, 0.1824, 0.1149, 0.5077],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0793, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8548, 0.6197, 0.5256, 0.0726, 0.0807, 0.0807, 0.7061])
output:  tensor([0.6137, 0.4894, 0.5994, 0.5927, 0.2072, 0.1042, 0.5143],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0578, device='cuda:0', grad_fn=<MseLossBackward0

output:  tensor([0.8837, 0.3867, 0.1667, 0.7969, 0.1638, 0.1057, 0.2230],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0104, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9943, 0.2512, 0.3386, 0.9587, 0.0592, 0.0359, 0.1033])
output:  tensor([0.6412, 0.3969, 0.5719, 0.6405, 0.1861, 0.1037, 0.4423],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0625, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.4958e-01, 5.8776e-01, 3.6277e-01, 7.8122e-05, 8.5230e-02, 6.4698e-01,
        6.5222e-01])
output:  tensor([0.5300, 0.6193, 0.4182, 0.5517, 0.2125, 0.1500, 0.4501],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1021, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0126, 0.4143, 0.5584, 0.9129, 0.1173, 0.0761, 0.2561])
output:  tensor([0.1798, 0.6572, 0.6449, 0.6439, 0.1727, 0.1229,

loss:  tensor(0.0255, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6956, 0.4390, 0.7907, 0.8824, 0.0979, 0.0342, 0.2880])
output:  tensor([0.6627, 0.4049, 0.7052, 0.5592, 0.2058, 0.0808, 0.5301],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0266, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8440, 0.4683, 0.5524, 0.9063, 0.1223, 0.0295, 0.6089])
output:  tensor([0.4963, 0.4972, 0.7234, 0.5383, 0.2038, 0.0919, 0.5650],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0427, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1783, 0.4775, 0.9419, 0.8948, 0.0937, 0.0547, 0.5563])
output:  tensor([0.3216, 0.3501, 0.8162, 0.6065, 0.1934, 0.1026, 0.4513],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0227, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.5823, 0.4186, 0.6456, 0.6629, 0.1990, 0.0978, 0.4170],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1048, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0046, 0.6127, 0.9659, 0.7738, 0.1792, 0.0433, 0.7388])
output:  tensor([0.5156, 0.5592, 0.6033, 0.6786, 0.1689, 0.0918, 0.4219],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0725, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9895, 0.4119, 0.1962, 0.9347, 0.0849, 0.0935, 0.0516])
output:  tensor([0.6581, 0.5203, 0.2852, 0.6147, 0.2192, 0.1762, 0.3821],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0523, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9943, 0.2550, 0.1494, 0.3214, 0.0608, 0.0454, 0.1181])
output:  tensor([0.7519, 0.5076, 0.2552, 0.5933, 0.2137, 0.1448, 0.3927],
       device='cuda:0', gr

output:  tensor([0.8558, 0.4192, 0.2751, 0.5981, 0.2115, 0.1080, 0.3818],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0669, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9827, 0.4608, 0.6466, 0.9188, 0.2029, 0.0729, 0.0684])
output:  tensor([0.7550, 0.3732, 0.4875, 0.6783, 0.1698, 0.0784, 0.4056],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0368, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9702, 0.3866, 0.1352, 0.9199, 0.1075, 0.0301, 0.1880])
output:  tensor([0.8315, 0.3714, 0.1975, 0.6988, 0.1919, 0.1115, 0.2246],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0125, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0011, 0.5168, 0.8561, 0.0040, 0.0901, 0.0569, 0.2951])
output:  tensor([0.3454, 0.4177, 0.7403, 0.6308, 0.1781, 0.0950, 0.4778],
       device='cuda:0', gr

loss:  tensor(0.0272, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0023, 0.6374, 0.8886, 0.0016, 0.0897, 0.0394, 0.5763])
output:  tensor([0.2307, 0.5585, 0.7749, 0.5986, 0.1920, 0.1199, 0.5724],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0635, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0061, 0.7128, 0.4550, 0.8761, 0.0722, 0.1326, 0.8481])
output:  tensor([0.1433, 0.6628, 0.6984, 0.6318, 0.2087, 0.1430, 0.5400],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0363, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4405, 0.6110, 0.5741, 0.9393, 0.2685, 0.0332, 0.8132])
output:  tensor([0.3461, 0.5931, 0.5194, 0.6918, 0.1908, 0.1396, 0.4100],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0362, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0836, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([5.2911e-05, 2.6519e-01, 7.9419e-01, 8.5965e-01, 1.7818e-01, 4.9819e-02,
        6.6672e-01])
output:  tensor([0.3786, 0.4251, 0.7371, 0.5244, 0.2194, 0.1118, 0.5431],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6040, 0.2304, 0.9669, 0.8261, 0.6457, 0.5238, 0.0576])
output:  tensor([0.3044, 0.3547, 0.8788, 0.6322, 0.2064, 0.1254, 0.4065],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0891, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6303, 0.2239, 0.4947, 0.9557, 0.1090, 0.0324, 0.1823])
output:  tensor([0.8615, 0.3703, 0.3087, 0.7366, 0.1500, 0.0796, 0.2870],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0246, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.0010, 0.7593, 0.9892, 0.8335, 0.1077, 0.1336, 0.9648])
output:  tensor([0.3805, 0.6134, 0.6205, 0.6282, 0.1844, 0.1081, 0.5173],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0786, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2687, 0.6492, 0.7171, 0.6095, 0.0740, 0.0281, 0.7877])
output:  tensor([0.1642, 0.5762, 0.7374, 0.6424, 0.1844, 0.1275, 0.4828],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0190, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1751, 0.1155, 0.9582, 0.0499, 0.1130, 0.0960, 0.1848])
output:  tensor([0.4923, 0.5028, 0.7079, 0.5329, 0.2047, 0.1198, 0.5548],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0989, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5468, 0.4281, 0.4963, 0.9225, 0.1987, 0.0297, 0

loss:  tensor(0.0359, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.4627e-01, 3.4352e-01, 4.0060e-01, 3.7623e-04, 2.0345e-01, 2.8988e-02,
        2.6078e-01])
output:  tensor([0.5969, 0.4479, 0.6281, 0.6201, 0.1812, 0.0825, 0.4420],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0779, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.1996e-02, 4.2070e-01, 5.7202e-01, 7.3342e-06, 9.2498e-02, 3.1103e-02,
        3.1450e-01])
output:  tensor([0.2013, 0.4748, 0.8358, 0.6077, 0.1904, 0.1233, 0.4733],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0739, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0225, 0.4679, 0.5277, 0.0247, 0.1220, 0.0363, 0.8589])
output:  tensor([0.2199, 0.5449, 0.7199, 0.6182, 0.1862, 0.1140, 0.5111],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0808, device

labels:  tensor([0.9489, 0.4782, 0.5681, 0.5921, 0.3294, 0.0303, 0.2247])
output:  tensor([0.6314, 0.3400, 0.7422, 0.5595, 0.1847, 0.0929, 0.5386],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0392, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0025, 0.3622, 0.2342, 0.4488, 0.0558, 0.0421, 0.2407])
output:  tensor([0.3803, 0.5706, 0.3314, 0.6887, 0.1834, 0.1291, 0.3370],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0409, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9824, 0.1363, 0.1320, 0.9042, 0.1313, 0.0310, 0.1489])
output:  tensor([0.6690, 0.4843, 0.2902, 0.5531, 0.2293, 0.1511, 0.3376],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0610, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3455, 0.8298, 0.5521, 0.9089, 0.1339, 0.1426, 0.4629])
output:  tensor([0.3091, 0

loss:  tensor(0.1007, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.6795e-04, 1.6450e-01, 4.2284e-01, 8.1107e-01, 4.3769e-01, 6.0351e-02,
        1.7563e-01])
output:  tensor([0.2609, 0.3530, 0.8676, 0.6282, 0.1908, 0.1308, 0.4506],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0680, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9733, 0.5002, 0.3054, 0.6384, 0.1235, 0.0331, 0.1523])
output:  tensor([0.8279, 0.4403, 0.3580, 0.6262, 0.1809, 0.0864, 0.3538],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0106, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8266, 0.1827, 0.2063, 0.7439, 0.1821, 0.0359, 0.0219])
output:  tensor([0.3777, 0.4814, 0.6141, 0.6637, 0.1609, 0.1048, 0.4204],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0896, device='cuda:0', grad_fn=<MseLossBackward0

output:  tensor([0.8002, 0.3966, 0.5231, 0.6807, 0.1773, 0.0762, 0.3968],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0328, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4638, 0.2174, 0.2772, 0.9175, 0.1568, 0.0375, 0.2062])
output:  tensor([0.3175, 0.4574, 0.5361, 0.7414, 0.1862, 0.1400, 0.4003],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0323, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2125, 0.3831, 0.6052, 0.9050, 0.2199, 0.0537, 0.0503])
output:  tensor([0.3779, 0.4926, 0.7263, 0.6432, 0.1967, 0.1038, 0.4702],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0431, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5837, 0.3914, 0.8567, 0.7877, 0.3437, 0.0521, 0.9650])
output:  tensor([0.6250, 0.3748, 0.7617, 0.6096, 0.1909, 0.0924, 0.5026],
       device='cuda:0', gr

output:  tensor([0.5809, 0.4690, 0.5149, 0.6705, 0.1763, 0.0920, 0.4325],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0100, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9802, 0.3797, 0.8443, 0.9429, 0.1173, 0.0530, 0.2773])
output:  tensor([0.5547, 0.3772, 0.7237, 0.6195, 0.1946, 0.0883, 0.4621],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0488, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0595, 0.6212, 0.7353, 0.7985, 0.0823, 0.0851, 0.6822])
output:  tensor([0.2292, 0.6999, 0.5746, 0.5960, 0.1642, 0.1103, 0.5822],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0170, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0108, 0.4049, 0.7285, 0.7169, 0.1289, 0.0461, 0.1127])
output:  tensor([0.7705, 0.4061, 0.4447, 0.6798, 0.1737, 0.0886, 0.4267],
       device='cuda:0', gr

loss:  tensor(0.0625, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9818, 0.8185, 0.4854, 0.8741, 0.1591, 0.2154, 0.9349])
output:  tensor([0.6370, 0.5697, 0.4543, 0.5800, 0.2218, 0.1237, 0.5191],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0648, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0018, 0.5244, 0.8112, 0.0081, 0.1235, 0.0777, 0.5870])
output:  tensor([0.3914, 0.4429, 0.7790, 0.6520, 0.1883, 0.1041, 0.4357],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0860, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5253, 0.2233, 0.7406, 0.9217, 0.1152, 0.0454, 0.0770])
output:  tensor([0.3664, 0.3746, 0.8029, 0.6380, 0.1877, 0.0976, 0.4432],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0392, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.7246, 0.4211, 0.5285, 0.6312, 0.1755, 0.0688, 0.4770],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0052, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8714, 0.6902, 0.2313, 0.9177, 0.0953, 0.0335, 0.2082])
output:  tensor([0.6965, 0.4724, 0.3461, 0.6706, 0.1777, 0.1152, 0.3910],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0285, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([4.0270e-01, 7.2713e-01, 7.3273e-01, 1.3580e-07, 3.0279e-01, 1.4183e-01,
        8.5356e-01])
output:  tensor([0.4836, 0.5340, 0.7086, 0.5825, 0.2079, 0.1018, 0.5150],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0727, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6408, 0.5639, 0.3934, 0.9563, 0.5654, 0.0276, 0.4773])
output:  tensor([0.1745, 0.5781, 0.5677, 0.7381, 0.1622, 0.1298,

loss:  tensor(0.0460, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1593, 0.3942, 0.6406, 0.8540, 0.6648, 0.0364, 0.4461])
output:  tensor([0.3524, 0.5810, 0.5920, 0.6007, 0.1681, 0.0931, 0.5180],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0563, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1589, 0.4648, 0.8563, 0.8866, 0.2798, 0.0361, 0.5196])
output:  tensor([0.4669, 0.5175, 0.5436, 0.7220, 0.1674, 0.1001, 0.4502],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0349, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8356, 0.4981, 0.3547, 0.7129, 0.0678, 0.0468, 0.6801])
output:  tensor([0.6552, 0.5069, 0.4067, 0.5969, 0.1881, 0.1167, 0.4597],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0167, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0814, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.6464e-01, 2.2645e-01, 1.4668e-01, 1.0500e-08, 3.7617e-01, 9.8770e-02,
        3.9243e-02])
output:  tensor([0.5400, 0.5293, 0.5425, 0.6561, 0.1898, 0.0974, 0.4493],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1461, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3610, 0.4159, 0.9073, 0.7619, 0.0557, 0.0457, 0.4393])
output:  tensor([0.4910, 0.4285, 0.8158, 0.7538, 0.2530, 0.1523, 0.4738],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0110, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1355, 0.5112, 0.6189, 0.7514, 0.1434, 0.0434, 0.7128])
output:  tensor([0.3917, 0.6484, 0.5324, 0.6294, 0.1617, 0.1112, 0.5048],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0222, device='cuda:0', grad_fn=<MseLossBackward0

labels:  tensor([0.8444, 0.6240, 0.6073, 0.2115, 0.0889, 0.0308, 0.4072])
output:  tensor([0.5249, 0.5566, 0.6706, 0.5673, 0.1953, 0.0889, 0.5640],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0395, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7986, 0.6283, 0.7516, 0.9566, 0.1079, 0.0283, 0.6499])
output:  tensor([0.6287, 0.4338, 0.5600, 0.6700, 0.1591, 0.0874, 0.3729],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0384, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.4791e-02, 1.6084e-01, 4.2481e-04, 9.5708e-01, 7.1321e-01, 6.6908e-02,
        3.8035e-02])
output:  tensor([0.6306, 0.3023, 0.6129, 0.7602, 0.1524, 0.0799, 0.3847],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1651, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.1981e-04, 6.5185e-02, 1.1398e-01, 9.7099e-01,

loss:  tensor(0.0551, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9365, 0.6514, 0.2848, 0.9262, 0.0913, 0.0280, 0.6004])
output:  tensor([0.6434, 0.4508, 0.3490, 0.6632, 0.1724, 0.1045, 0.3475],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0394, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0632, 0.7736, 0.4639, 0.0950, 0.0977, 0.1852, 0.3541])
output:  tensor([0.3599, 0.6230, 0.5729, 0.6702, 0.1904, 0.1348, 0.5914],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0744, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0396, 0.7589, 0.8244, 0.8241, 0.0800, 0.0627, 0.9789])
output:  tensor([0.1270, 0.7255, 0.7562, 0.5845, 0.2176, 0.1307, 0.6337],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0305, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

labels:  tensor([0.8205, 0.3972, 0.5348, 0.9697, 0.2089, 0.0304, 0.7820])
output:  tensor([0.5377, 0.4403, 0.6762, 0.6095, 0.1819, 0.1122, 0.4703],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0480, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0013, 0.4169, 0.2194, 0.5898, 0.1074, 0.0490, 0.0321])
output:  tensor([0.2946, 0.2836, 0.8946, 0.6038, 0.2260, 0.1298, 0.3600],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0983, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0564, 0.7518, 0.8121, 0.7153, 0.0924, 0.0621, 0.8108])
output:  tensor([0.2221, 0.6230, 0.6752, 0.6739, 0.1778, 0.1444, 0.5383],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0218, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0015, 0.6429, 0.6628, 0.6275, 0.3298, 0.0452, 0.3231])
output:  tensor([0.1271, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.7804, 0.4710, 0.4106, 0.3046, 0.1268, 0.0534, 0.6829])
output:  tensor([0.7627, 0.3789, 0.6240, 0.6138, 0.1857, 0.0779, 0.4501],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0297, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0009, 0.3690, 0.4623, 0.5433, 0.1707, 0.0643, 0.4900])
output:  tensor([0.1011, 0.6205, 0.7446, 0.6285, 0.1921, 0.1298, 0.5119],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0236, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0563, 0.4562, 0.7136, 0.0000, 0.1744, 0.2553, 0.2280])
output:  tensor([0.1732, 0.6218, 0.7761, 0.5976, 0.2216, 0.1247, 0.5204],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0724, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5759, 0.1898, 0.9238, 0.7941, 0.1750, 0.0465, 0

output:  tensor([0.8339, 0.4416, 0.3956, 0.6781, 0.1963, 0.0925, 0.3231],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0201, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0872, 0.5674, 0.9065, 0.8924, 0.2907, 0.1116, 0.2046])
output:  tensor([0.8096, 0.4713, 0.5326, 0.7292, 0.1704, 0.0927, 0.4689],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1117, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9691, 0.2922, 0.3457, 0.8863, 0.1824, 0.0388, 0.5105])
output:  tensor([0.5807, 0.3525, 0.6992, 0.6596, 0.1850, 0.0799, 0.4250],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0486, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6576, 0.4798, 0.7192, 0.9267, 0.1379, 0.5067, 0.6811])
output:  tensor([0.3210, 0.4782, 0.7435, 0.5753, 0.2001, 0.0984, 0.4969],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.2839, 0.6225, 0.3925, 0.7731, 0.1206, 0.0377, 0.2377])
output:  tensor([0.5202, 0.5327, 0.4481, 0.6768, 0.1587, 0.0889, 0.3960],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0151, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0665, 0.4534, 0.6232, 0.8940, 0.0673, 0.0744, 0.1516])
output:  tensor([0.2936, 0.3853, 0.7886, 0.5952, 0.2128, 0.1266, 0.4100],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0377, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9430, 0.3199, 0.7337, 0.4760, 0.1019, 0.0699, 0.3860])
output:  tensor([0.5532, 0.3620, 0.7171, 0.6239, 0.2020, 0.0871, 0.4341],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0269, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6279, 0.5889, 0.1567, 0.0863, 0.0864, 0.0338, 0

loss:  tensor(0.0574, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2201, 0.5107, 0.9699, 0.8971, 0.0928, 0.1286, 0.0399])
output:  tensor([0.4041, 0.3934, 0.7711, 0.5874, 0.2090, 0.0915, 0.4455],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0518, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9863, 0.4770, 0.0602, 0.9210, 0.1117, 0.0361, 0.1455])
output:  tensor([0.9199, 0.4016, 0.2758, 0.6765, 0.1905, 0.0898, 0.3458],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0236, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0273, 0.5675, 0.6186, 0.0969, 0.2584, 0.0344, 0.8676])
output:  tensor([0.3162, 0.4892, 0.6403, 0.6519, 0.1704, 0.0920, 0.4664],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0814, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0197, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9687, 0.2492, 0.6621, 0.0731, 0.5039, 0.1474, 0.0287])
output:  tensor([0.7624, 0.5180, 0.2736, 0.5645, 0.2140, 0.1388, 0.3974],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1039, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9783, 0.3977, 0.5710, 0.0846, 0.1307, 0.0691, 0.2079])
output:  tensor([0.8215, 0.3917, 0.5109, 0.6398, 0.1770, 0.0753, 0.4402],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0561, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8987, 0.5809, 0.7931, 0.8310, 0.6643, 0.0405, 0.3500])
output:  tensor([0.4387, 0.4574, 0.7058, 0.5528, 0.2079, 0.1018, 0.5541],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0808, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.3830, 0.3942, 0.7892, 0.6180, 0.1967, 0.1006, 0.4727],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0571, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3029, 0.7820, 0.4809, 0.9306, 0.0889, 0.0379, 0.6971])
output:  tensor([0.2860, 0.6494, 0.4872, 0.6920, 0.1612, 0.1093, 0.4057],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0243, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9936, 0.5844, 0.0872, 0.9403, 0.1111, 0.1258, 0.3896])
output:  tensor([0.8023, 0.4502, 0.1879, 0.7583, 0.1669, 0.1321, 0.3041],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0155, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6908, 0.4370, 0.8896, 0.0000, 0.3161, 0.2263, 0.8433])
output:  tensor([0.3097, 0.4756, 0.7786, 0.6459, 0.2300, 0.1287, 0.4581],
       device='cuda:0', gr

output:  tensor([0.3390, 0.5887, 0.7565, 0.5741, 0.2322, 0.1202, 0.5141],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0347, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8836, 0.3236, 0.5346, 0.0402, 0.1482, 0.1198, 0.8997])
output:  tensor([0.5072, 0.4329, 0.6690, 0.6018, 0.1991, 0.0938, 0.5070],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0921, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1097, 0.5661, 0.8808, 0.6043, 0.1023, 0.0485, 0.0391])
output:  tensor([0.3170, 0.5269, 0.7455, 0.5530, 0.1978, 0.0976, 0.5538],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0488, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9456, 0.6469, 0.2955, 0.2222, 0.0609, 0.0541, 0.5028])
output:  tensor([0.8033, 0.5497, 0.4467, 0.5461, 0.1966, 0.1236, 0.6127],
       device='cuda:0', gr

loss:  tensor(0.1560, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0095, 0.4057, 0.4364, 0.8756, 0.1069, 0.0309, 0.5720])
output:  tensor([0.5911, 0.4019, 0.5860, 0.6337, 0.1742, 0.0755, 0.4370],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0634, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9837, 0.7176, 0.3256, 0.7741, 0.1157, 0.0490, 0.6660])
output:  tensor([0.6425, 0.5467, 0.4704, 0.6327, 0.1687, 0.0999, 0.4724],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0328, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9357, 0.5635, 0.0954, 0.9119, 0.1034, 0.0676, 0.6115])
output:  tensor([0.7076, 0.4835, 0.2499, 0.6730, 0.1714, 0.1268, 0.3571],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0303, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0162, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7630, 0.5672, 0.3502, 0.3021, 0.1128, 0.0314, 0.1812])
output:  tensor([0.6880, 0.5595, 0.3638, 0.7428, 0.1516, 0.1186, 0.3743],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0352, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9491, 0.3127, 0.1014, 0.9343, 0.0962, 0.0348, 0.0792])
output:  tensor([0.8362, 0.3893, 0.1839, 0.6711, 0.1805, 0.1167, 0.3116],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0232, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.3737e-01, 4.1492e-01, 5.2416e-01, 8.5458e-05, 9.5292e-02, 3.4603e-02,
        5.7690e-01])
output:  tensor([0.5643, 0.3880, 0.6847, 0.6073, 0.2057, 0.0925, 0.4662],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0711, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.5127, 0.3762, 0.6294, 0.9528, 0.0862, 0.0353, 0.3525])
output:  tensor([0.3803, 0.4272, 0.6896, 0.7038, 0.1855, 0.0989, 0.4377],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0153, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9808, 0.7057, 0.2763, 0.8790, 0.1152, 0.0471, 0.8765])
output:  tensor([0.8416, 0.4345, 0.2850, 0.6848, 0.1662, 0.0971, 0.3554],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0582, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4764, 0.4430, 0.1684, 0.9097, 0.1500, 0.0564, 0.5097])
output:  tensor([0.3746, 0.6037, 0.3035, 0.6821, 0.1733, 0.1441, 0.3589],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0196, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2471, 0.5598, 0.6364, 0.0310, 0.1192, 0.0599, 0

output:  tensor([0.4730, 0.4791, 0.7232, 0.5538, 0.2008, 0.0911, 0.5586],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0864, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8812, 0.5970, 0.6312, 0.9135, 0.1070, 0.0289, 0.2993])
output:  tensor([0.5840, 0.4499, 0.6294, 0.6385, 0.1925, 0.0942, 0.4496],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0314, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8973, 0.6048, 0.2400, 0.0009, 0.0982, 0.0331, 0.2305])
output:  tensor([0.7949, 0.4240, 0.4779, 0.6219, 0.1741, 0.0744, 0.4083],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0749, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5491, 0.4005, 0.7439, 0.7610, 0.3110, 0.0331, 0.9541])
output:  tensor([0.2721, 0.4310, 0.7839, 0.6441, 0.2017, 0.1055, 0.4213],
       device='cuda:0', gr

loss:  tensor(0.0319, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.5716, 0.2129, 0.9425, 0.1129, 0.0683, 0.4441])
output:  tensor([0.8808, 0.4096, 0.3163, 0.6187, 0.1943, 0.1265, 0.4974],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0240, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8926, 0.6717, 0.2688, 0.9076, 0.0622, 0.0602, 0.2632])
output:  tensor([0.9249, 0.4090, 0.2859, 0.6053, 0.1977, 0.1043, 0.3347],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0267, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9049, 0.7719, 0.4964, 0.7994, 0.1544, 0.0419, 0.6203])
output:  tensor([0.8274, 0.5022, 0.2417, 0.6051, 0.2060, 0.1517, 0.4333],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0330, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.2006, 0.4744, 0.7265, 0.6837, 0.1812, 0.1175, 0.4403],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0292, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.3682e-04, 8.2093e-01, 2.8662e-01, 8.6602e-01, 1.0144e-01, 6.7259e-02,
        6.5056e-01])
output:  tensor([0.1339, 0.7758, 0.5631, 0.6377, 0.1790, 0.1446, 0.5363],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0248, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0081, 0.4444, 0.7845, 0.0129, 0.2359, 0.1269, 0.6509])
output:  tensor([0.1262, 0.5032, 0.8135, 0.6818, 0.1972, 0.1235, 0.4331],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0735, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1672, 0.5434, 0.9803, 0.8213, 0.0595, 0.0346, 0.2159])
output:  tensor([0.5885, 0.4595, 0.7600, 0.5268, 0.2213, 0.1022,

loss:  tensor(0.0817, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5615, 0.7015, 0.6953, 0.1263, 0.1050, 0.0371, 0.6293])
output:  tensor([0.3700, 0.5386, 0.6928, 0.6263, 0.1996, 0.0889, 0.5057],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0486, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8282, 0.4224, 0.6781, 0.9140, 0.1090, 0.0402, 0.1797])
output:  tensor([0.9305, 0.4132, 0.2965, 0.6450, 0.2047, 0.1035, 0.3548],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0389, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1764, 0.4183, 0.6339, 0.7428, 0.3544, 0.0523, 0.1567])
output:  tensor([0.3213, 0.4596, 0.7471, 0.6164, 0.1814, 0.0984, 0.4786],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0267, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.4348, 0.5962, 0.5070, 0.5960, 0.1918, 0.1254, 0.5307],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0770, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9646, 0.7002, 0.3918, 0.8309, 0.1346, 0.0752, 0.7355])
output:  tensor([0.5808, 0.5129, 0.3490, 0.7156, 0.1829, 0.1020, 0.3637],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0484, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3766, 0.8436, 0.8483, 0.7339, 0.3475, 0.0437, 0.9332])
output:  tensor([0.4608, 0.7055, 0.6268, 0.5031, 0.2378, 0.1232, 0.6411],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0332, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2880, 0.5607, 0.8992, 0.7543, 0.1781, 0.0872, 0.0220])
output:  tensor([0.7320, 0.4435, 0.1876, 0.6873, 0.1770, 0.1568, 0.2959],
       device='cuda:0', gr

output:  tensor([0.2432, 0.6783, 0.5796, 0.5964, 0.1714, 0.1206, 0.6208],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0395, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4210, 0.7839, 0.5507, 0.0226, 0.0831, 0.1867, 0.7784])
output:  tensor([0.3603, 0.6578, 0.6357, 0.6079, 0.2071, 0.1182, 0.5563],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0627, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6477, 0.2491, 0.4095, 0.9313, 0.4342, 0.0502, 0.0853])
output:  tensor([0.8840, 0.3008, 0.0858, 0.6636, 0.2122, 0.1831, 0.1758],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0443, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9539, 0.3366, 0.6031, 0.5290, 0.1121, 0.0637, 0.4095])
output:  tensor([0.3597, 0.3825, 0.7502, 0.6309, 0.2022, 0.1060, 0.4441],
       device='cuda:0', gr

loss:  tensor(0.0251, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9640, 0.4629, 0.4889, 0.6930, 0.1128, 0.5954, 0.0336])
output:  tensor([0.7282, 0.4386, 0.3626, 0.6579, 0.1971, 0.1112, 0.3846],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0626, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4714, 0.3941, 0.9653, 0.9769, 0.0955, 0.0513, 0.3357])
output:  tensor([0.1559, 0.5189, 0.7538, 0.6593, 0.1791, 0.0931, 0.4173],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0394, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8811, 0.5993, 0.5581, 0.9431, 0.1148, 0.0412, 0.6381])
output:  tensor([0.4864, 0.5823, 0.5125, 0.6155, 0.1658, 0.1094, 0.5052],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0415, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.6765, 0.5022, 0.1799, 0.6301, 0.1995, 0.1562, 0.3073],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0707, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9034, 0.3320, 0.7586, 0.9318, 0.1122, 0.0377, 0.1549])
output:  tensor([0.5189, 0.4970, 0.3701, 0.7222, 0.1446, 0.0848, 0.3342],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0579, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9754, 0.5684, 0.1351, 0.9320, 0.1131, 0.3493, 0.0308])
output:  tensor([0.9798, 0.4465, 0.0644, 0.7962, 0.1404, 0.1297, 0.2350],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0184, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0234, 0.7599, 0.5626, 0.8449, 0.1088, 0.0341, 0.5088])
output:  tensor([0.3044, 0.5884, 0.6225, 0.6529, 0.1816, 0.1107, 0.4756],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.0134, 0.6478, 0.7824, 0.2155, 0.6018, 0.0730, 0.7305])
output:  tensor([0.2626, 0.5734, 0.7403, 0.5932, 0.2098, 0.1117, 0.5625],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0565, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7862, 0.7494, 0.2903, 0.8506, 0.1036, 0.4923, 0.5367])
output:  tensor([0.7149, 0.5973, 0.3289, 0.4893, 0.2017, 0.1647, 0.4660],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0403, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7968, 0.4336, 0.1285, 0.8645, 0.1764, 0.0455, 0.0398])
output:  tensor([0.8660, 0.4005, 0.1163, 0.7292, 0.1620, 0.1601, 0.2318],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0107, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([7.8844e-04, 3.2396e-01, 9.0424e-01, 8.2710e-01, 1

len labels =  7
len otput[i] =  7
labels:  tensor([0.5851, 0.2726, 0.7800, 0.8860, 0.1716, 0.0418, 0.2970])
output:  tensor([0.5611, 0.3922, 0.6460, 0.6882, 0.1648, 0.0700, 0.3895],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0116, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.7316e-01, 6.1945e-01, 4.0550e-01, 1.1300e-08, 9.4200e-02, 3.7285e-01,
        4.5175e-01])
output:  tensor([0.5202, 0.5978, 0.5446, 0.6408, 0.1939, 0.1088, 0.5615],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0833, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9430, 0.3422, 0.9351, 0.9204, 0.1048, 0.1503, 0.1491])
output:  tensor([0.7560, 0.4001, 0.6197, 0.7017, 0.1711, 0.0716, 0.3587],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0343, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9941, 0.281

loss:  tensor(0.1009, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0018, 0.6903, 0.5771, 0.8807, 0.2110, 0.0649, 0.5973])
output:  tensor([0.1606, 0.6791, 0.7430, 0.6138, 0.2109, 0.1393, 0.5822],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0186, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4684, 0.4660, 0.1125, 0.4458, 0.1118, 0.0315, 0.1662])
output:  tensor([0.5108, 0.5499, 0.3662, 0.6395, 0.1637, 0.1150, 0.3967],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0248, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.8929e-01, 3.0568e-01, 9.2909e-02, 2.6735e-06, 1.4038e-01, 5.4193e-02,
        1.6284e-01])
output:  tensor([0.8486, 0.5694, 0.4412, 0.5086, 0.2404, 0.2109, 0.6299],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1032, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.9212, 0.3730, 0.2204, 0.9264, 0.1493, 0.0335, 0.0388])
output:  tensor([0.8636, 0.4177, 0.1834, 0.6949, 0.1607, 0.1290, 0.3251],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0216, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7496, 0.8674, 0.3963, 0.9347, 0.0963, 0.0822, 0.5238])
output:  tensor([0.3957, 0.4809, 0.6007, 0.7394, 0.1663, 0.0997, 0.4009],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0535, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0132, 0.8162, 0.6220, 0.9080, 0.1852, 0.0687, 0.4781])
output:  tensor([0.1764, 0.7247, 0.5931, 0.5836, 0.1674, 0.1407, 0.6456],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0249, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.7548e-04, 1.9706e-01, 7.9704e-01, 9.8294e-03, 1

loss:  tensor(0.1752, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5095, 0.2442, 0.8469, 0.8727, 0.1024, 0.0364, 0.0390])
output:  tensor([0.5835, 0.3314, 0.7005, 0.6879, 0.1866, 0.0736, 0.4131],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0310, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0640, 0.2778, 0.9144, 0.4382, 0.1827, 0.0507, 0.3062])
output:  tensor([0.2387, 0.4190, 0.8492, 0.5908, 0.2163, 0.1199, 0.4561],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0152, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([6.2819e-05, 3.6599e-01, 6.9070e-01, 1.1207e-06, 2.5383e-01, 4.5781e-02,
        1.9589e-01])
output:  tensor([0.1609, 0.4429, 0.8623, 0.6174, 0.2106, 0.1139, 0.4394],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0726, device='cuda:0', grad_fn=<MseLossBackward0

loss:  tensor(0.0175, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0519, 0.2579, 0.4816, 0.1239, 0.0557, 0.0334, 0.1585])
output:  tensor([0.3502, 0.5547, 0.5321, 0.6697, 0.1549, 0.1014, 0.5176],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0887, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9808, 0.1590, 0.3183, 0.9702, 0.1336, 0.0628, 0.0292])
output:  tensor([0.8717, 0.3140, 0.0856, 0.7213, 0.2187, 0.1519, 0.1477],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0259, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5565, 0.4700, 0.6930, 0.8720, 0.1506, 0.0260, 0.2817])
output:  tensor([0.2812, 0.3695, 0.8133, 0.6482, 0.1965, 0.0950, 0.3872],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0241, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.5938, 0.5263, 0.4766, 0.6535, 0.1897, 0.0955, 0.4284],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0417, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9707, 0.4242, 0.0238, 0.0040, 0.1116, 0.0361, 0.2895])
output:  tensor([0.8821, 0.4206, 0.2199, 0.5758, 0.2035, 0.1648, 0.3559],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0575, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([4.7019e-01, 5.6721e-01, 1.5928e-01, 2.9220e-07, 3.6678e-01, 4.0845e-02,
        2.0506e-01])
output:  tensor([0.8105, 0.3648, 0.2695, 0.7367, 0.1405, 0.0838, 0.2837],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1101, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2492, 0.8413, 0.5189, 0.8372, 0.2705, 0.0991, 0.6182])
output:  tensor([0.5253, 0.5753, 0.4642, 0.6475, 0.1536, 0.0988,

output:  tensor([0.1844, 0.5950, 0.7130, 0.5511, 0.2128, 0.1101, 0.6454],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0247, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0020, 0.3457, 0.9706, 0.0019, 0.1350, 0.1551, 0.2559])
output:  tensor([0.3159, 0.3671, 0.8016, 0.6310, 0.1962, 0.1053, 0.4088],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0790, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0267, 0.7868, 0.0708, 0.9298, 0.0781, 0.0605, 0.0842])
output:  tensor([0.3855, 0.5906, 0.3058, 0.7170, 0.1772, 0.1442, 0.3099],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0479, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1051, 0.6223, 0.6874, 0.9418, 0.0966, 0.0537, 0.1058])
output:  tensor([0.8659, 0.3839, 0.3413, 0.6652, 0.1705, 0.0887, 0.3972],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.0091, 0.4861, 0.6902, 0.0628, 0.1131, 0.0308, 0.7951])
output:  tensor([0.6201, 0.4312, 0.6242, 0.6273, 0.1984, 0.0852, 0.4481],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1186, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7151, 0.7845, 0.3573, 0.7948, 0.1040, 0.0685, 0.8029])
output:  tensor([0.5983, 0.5317, 0.3723, 0.6101, 0.1774, 0.1149, 0.4718],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0327, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8553, 0.3163, 0.6511, 0.9619, 0.1117, 0.0473, 0.5322])
output:  tensor([0.4767, 0.3443, 0.6966, 0.6796, 0.1880, 0.0903, 0.4083],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0356, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9932, 0.4884, 0.3853, 0.9003, 0.1104, 0.0474, 0

loss:  tensor(0.0619, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8960, 0.5463, 0.8333, 0.9669, 0.1629, 0.0322, 0.7757])
output:  tensor([0.4217, 0.4486, 0.6126, 0.7185, 0.1642, 0.0992, 0.3632],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0742, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0032, 0.1264, 0.8071, 0.2708, 0.2001, 0.0509, 0.3526])
output:  tensor([0.6649, 0.3730, 0.5614, 0.6330, 0.1892, 0.0702, 0.4728],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1007, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.3295, 0.2437, 0.9698, 0.1169, 0.0492, 0.6420])
output:  tensor([0.8487, 0.4007, 0.3009, 0.7380, 0.1808, 0.0918, 0.3548],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0246, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0159, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8350, 0.2511, 0.7918, 0.9582, 0.1079, 0.0453, 0.1457])
output:  tensor([0.5667, 0.3733, 0.6353, 0.6897, 0.1654, 0.0755, 0.4035],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0363, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9957, 0.3376, 0.2650, 0.9720, 0.2734, 0.0640, 0.4578])
output:  tensor([0.8119, 0.4148, 0.2722, 0.6217, 0.1841, 0.1127, 0.4594],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0247, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.6224, 0.5296, 0.9054, 0.6553, 0.1119, 0.4635, 0.7164])
output:  tensor([0.2325, 0.3558, 0.8181, 0.6578, 0.1872, 0.1071, 0.3554],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0647, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.1478, 0.6414, 0.7375, 0.6642, 0.2097, 0.1285, 0.4654],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0065, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7059, 0.1763, 0.1914, 0.9188, 0.1063, 0.0417, 0.1627])
output:  tensor([0.1922, 0.6278, 0.6503, 0.6572, 0.1708, 0.0931, 0.5169],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1256, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9941, 0.3623, 0.4617, 0.9397, 0.3356, 0.0278, 0.6505])
output:  tensor([0.6516, 0.4054, 0.4999, 0.7102, 0.1763, 0.0947, 0.3452],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0423, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0119, 0.8625, 0.4364, 0.8681, 0.1117, 0.0595, 0.7310])
output:  tensor([0.4861, 0.5972, 0.3312, 0.6547, 0.1685, 0.1512, 0.4510],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.1223, 0.8199, 0.7261, 0.9492, 0.0976, 0.4936, 0.3246])
output:  tensor([0.2066, 0.6955, 0.6984, 0.5477, 0.2122, 0.1270, 0.6460],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0622, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([7.4495e-01, 4.8384e-01, 1.9704e-01, 5.6510e-07, 1.0892e-01, 3.5881e-02,
        1.3810e-01])
output:  tensor([0.4148, 0.5669, 0.4802, 0.6590, 0.1597, 0.1017, 0.4413],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1042, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0071, 0.6847, 0.8749, 0.9465, 0.0960, 0.0362, 0.7357])
output:  tensor([0.5673, 0.4686, 0.7439, 0.5487, 0.2337, 0.1181, 0.5659],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0844, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8878, 0.673

loss:  tensor(0.0650, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9911, 0.2298, 0.3001, 0.9581, 0.1901, 0.0323, 0.1630])
output:  tensor([0.7602, 0.3598, 0.5221, 0.7343, 0.1774, 0.0930, 0.3405],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0293, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9542, 0.4493, 0.7603, 0.8521, 0.8771, 0.0300, 0.7574])
output:  tensor([0.6045, 0.3210, 0.6422, 0.6688, 0.1868, 0.0946, 0.3773],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1159, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.9886e-04, 4.0274e-01, 8.5368e-01, 5.5188e-01, 1.9233e-01, 3.8477e-02,
        4.8369e-01])
output:  tensor([0.5696, 0.2874, 0.7544, 0.6633, 0.1967, 0.0907, 0.3233],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0555, device='cuda:0', grad_fn=<MseLossBackward0

len labels =  7
len otput[i] =  7
labels:  tensor([0.1011, 0.2379, 0.5489, 0.1994, 0.2897, 0.0286, 0.2241])
output:  tensor([0.3418, 0.3745, 0.7872, 0.6318, 0.2069, 0.1085, 0.4447],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0546, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.8753e-02, 6.7697e-01, 7.0118e-01, 2.1275e-06, 7.2696e-02, 3.7033e-02,
        4.7790e-01])
output:  tensor([0.2734, 0.4461, 0.7751, 0.6182, 0.2099, 0.1086, 0.4915],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0743, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9220, 0.3504, 0.5886, 0.8612, 0.1206, 0.1873, 0.5380])
output:  tensor([0.9145, 0.4196, 0.2157, 0.6434, 0.2123, 0.1287, 0.3750],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0328, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7885, 0.458

output:  tensor([0.8044, 0.5431, 0.3378, 0.5159, 0.2029, 0.1268, 0.4811],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1681, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2579, 0.1499, 0.7716, 0.9591, 0.3177, 0.0380, 0.1822])
output:  tensor([0.7133, 0.3087, 0.6314, 0.6142, 0.2040, 0.0745, 0.4547],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0657, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7879, 0.1562, 0.9340, 0.9349, 0.3361, 0.1162, 0.0290])
output:  tensor([0.5204, 0.3081, 0.7774, 0.6435, 0.2120, 0.0959, 0.3065],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0424, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0261, 0.8341, 0.5614, 0.7997, 0.2435, 0.0508, 0.7775])
output:  tensor([0.1769, 0.6989, 0.5419, 0.6371, 0.1774, 0.1377, 0.5693],
       device='cuda:0', gr

len labels =  7
len otput[i] =  7
labels:  tensor([0.4727, 0.4496, 0.6708, 0.8818, 0.3769, 0.0328, 0.2953])
output:  tensor([0.4726, 0.4701, 0.6338, 0.6699, 0.1769, 0.0803, 0.4272],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0152, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9215, 0.5839, 0.3711, 0.4127, 0.0885, 0.0395, 0.5144])
output:  tensor([0.8799, 0.3794, 0.3368, 0.6627, 0.1750, 0.0884, 0.3930],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0188, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9562, 0.6657, 0.3576, 0.8928, 0.1303, 0.0324, 0.6530])
output:  tensor([0.6501, 0.4906, 0.4044, 0.6751, 0.1581, 0.1049, 0.4322],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0327, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([9.9080e-01, 6.1592e-01, 3.5197e-01, 1.9982e-04, 1

output:  tensor([0.8211, 0.4741, 0.2478, 0.5159, 0.2523, 0.1601, 0.4221],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1224, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7608, 0.4036, 0.7781, 0.8804, 0.0639, 0.0425, 0.7001])
output:  tensor([0.5476, 0.3949, 0.7268, 0.5997, 0.2066, 0.0921, 0.4614],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0295, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9823, 0.5595, 0.6836, 0.9616, 0.1244, 0.0465, 0.6800])
output:  tensor([0.5132, 0.4838, 0.5061, 0.6967, 0.1640, 0.1229, 0.3472],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0636, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0112, 0.2495, 0.6169, 0.2495, 0.1541, 0.0317, 0.2371])
output:  tensor([0.4440, 0.3650, 0.7218, 0.6024, 0.1889, 0.0812, 0.4163],
       device='cuda:0', gr

labels:  tensor([0.8970, 0.6907, 0.8008, 0.8966, 0.1053, 0.1249, 0.3176])
output:  tensor([0.3217, 0.6293, 0.6312, 0.6248, 0.1847, 0.1094, 0.5723],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0727, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9646, 0.6236, 0.7828, 0.5099, 0.1045, 0.1716, 0.3779])
output:  tensor([0.3285, 0.3205, 0.8324, 0.6420, 0.1922, 0.0919, 0.3256],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0762, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9870, 0.2930, 0.1650, 0.9546, 0.1027, 0.0366, 0.2985])
output:  tensor([0.7388, 0.3653, 0.6084, 0.5841, 0.2003, 0.0866, 0.4409],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0618, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9343, 0.3036, 0.5062, 0.8821, 0.1142, 0.0346, 0.1436])
output:  tensor([0.7825, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.9956, 0.3107, 0.0042, 0.9007, 0.0905, 0.0443, 0.0604])
output:  tensor([0.9082, 0.3886, 0.0889, 0.8591, 0.1187, 0.1152, 0.2182],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0076, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8803, 0.3337, 0.6463, 0.7843, 0.7044, 0.0640, 0.1137])
output:  tensor([0.4658, 0.4385, 0.7700, 0.6182, 0.2010, 0.1028, 0.4871],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0886, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3000, 0.6830, 0.1140, 0.8338, 0.1593, 0.1233, 0.8781])
output:  tensor([0.4722, 0.4870, 0.5424, 0.6761, 0.1518, 0.0854, 0.4582],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0649, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7779, 0.5570, 0.3650, 0.9350, 0.1095, 0.0484, 0

len labels =  7
len otput[i] =  7
labels:  tensor([0.0526, 0.6379, 0.3285, 0.9255, 0.0941, 0.0392, 0.1237])
output:  tensor([0.5140, 0.5316, 0.3701, 0.7706, 0.1363, 0.1066, 0.3793],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0459, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1733, 0.6850, 0.8632, 0.8347, 0.1236, 0.0436, 0.2526])
output:  tensor([0.2088, 0.6248, 0.7827, 0.5213, 0.2310, 0.1177, 0.6551],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0412, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7161, 0.5149, 0.3421, 0.0442, 0.2115, 0.5907, 0.2845])
output:  tensor([0.3903, 0.5320, 0.5673, 0.6727, 0.1501, 0.0826, 0.4588],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1206, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.2941, 0.4327, 0.1349, 0.8609, 0.1597, 0.0369, 0

loss:  tensor(0.0713, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8288, 0.6867, 0.9866, 0.9712, 0.2707, 0.0573, 0.9821])
output:  tensor([0.3858, 0.5719, 0.7833, 0.4796, 0.2599, 0.1347, 0.6309],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0888, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7187, 0.5932, 0.8728, 0.9274, 0.0889, 0.0717, 0.6815])
output:  tensor([0.1519, 0.5404, 0.7411, 0.6661, 0.1822, 0.1146, 0.4883],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0654, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([1.9450e-01, 3.0790e-01, 4.3977e-01, 1.5381e-04, 1.5574e-01, 3.2184e-02,
        3.9938e-02])
output:  tensor([0.5766, 0.4401, 0.6602, 0.6041, 0.1960, 0.0918, 0.4776],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1105, device='cuda:0', grad_fn=<MseLossBackward0

labels:  tensor([0.7459, 0.8338, 0.4712, 0.3280, 0.1283, 0.0458, 0.2261])
output:  tensor([0.5342, 0.5769, 0.6299, 0.6039, 0.1857, 0.1083, 0.5570],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([3.9132e-05, 3.5594e-01, 9.5864e-01, 2.1136e-01, 5.8006e-01, 6.0410e-02,
        2.7799e-01])
output:  tensor([0.2188, 0.3289, 0.8401, 0.6829, 0.1783, 0.0959, 0.2902],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0640, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9174, 0.5059, 0.7856, 0.9505, 0.1473, 0.1497, 0.8608])
output:  tensor([0.6370, 0.5253, 0.6662, 0.6190, 0.2085, 0.0938, 0.5641],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0426, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8774, 0.5294, 0.1970, 0.8383, 0.1114, 0.0395,

len labels =  7
len otput[i] =  7
labels:  tensor([0.8867, 0.7789, 0.4895, 0.6880, 0.1103, 0.2532, 0.7995])
output:  tensor([0.6186, 0.6568, 0.5570, 0.5265, 0.2269, 0.1513, 0.6207],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0248, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9875, 0.2517, 0.8090, 0.9596, 0.1196, 0.0335, 0.7838])
output:  tensor([0.4513, 0.3437, 0.8032, 0.5644, 0.2269, 0.1136, 0.5096],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0779, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8731, 0.4830, 0.3634, 0.6324, 0.0694, 0.2926, 0.6576])
output:  tensor([0.6220, 0.5391, 0.4639, 0.6129, 0.1756, 0.0944, 0.5125],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0212, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1523, 0.3617, 0.7523, 0.9236, 0.0995, 0.0498, 0

output:  tensor([0.4099, 0.4700, 0.6552, 0.6071, 0.1844, 0.1028, 0.5282],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8714, 0.3374, 0.2968, 0.6600, 0.0685, 0.0312, 0.4551])
output:  tensor([0.6053, 0.5099, 0.4433, 0.6381, 0.1746, 0.1072, 0.4590],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0199, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0272, 0.4153, 0.6032, 0.0022, 0.1792, 0.0556, 0.1662])
output:  tensor([0.7983, 0.5618, 0.2241, 0.5572, 0.2152, 0.1572, 0.4550],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1661, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([7.6076e-04, 1.2988e-01, 9.3116e-01, 5.3111e-02, 6.9609e-01, 4.4203e-02,
        6.9043e-02])
output:  tensor([0.4591, 0.3590, 0.7568, 0.5561, 0.2131, 0.0931,

loss:  tensor(0.0334, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([5.0450e-01, 6.0718e-01, 4.6919e-01, 2.4451e-04, 3.2838e-01, 3.3703e-02,
        5.0510e-01])
output:  tensor([0.5393, 0.5065, 0.5897, 0.5706, 0.2016, 0.1051, 0.5105],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0532, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9911, 0.6048, 0.2919, 0.7404, 0.1124, 0.0337, 0.2246])
output:  tensor([0.6468, 0.5501, 0.4440, 0.6510, 0.1494, 0.0901, 0.4492],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0297, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9055, 0.5425, 0.2319, 0.0308, 0.1293, 0.0282, 0.3681])
output:  tensor([0.8265, 0.4866, 0.3357, 0.6174, 0.1947, 0.1209, 0.3774],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0539, device='cuda:0', grad_fn=<MseLossBackward0

loss:  tensor(0.0246, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.5526, 0.6923, 0.7708, 0.7341, 0.1101, 0.0768, 0.4293])
output:  tensor([0.5119, 0.4847, 0.7767, 0.5457, 0.2288, 0.1029, 0.5340],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0151, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0013, 0.4241, 0.6886, 0.8611, 0.1565, 0.0563, 0.1630])
output:  tensor([0.5620, 0.3642, 0.7132, 0.5583, 0.2121, 0.0979, 0.4280],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0693, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0253, 0.7004, 0.6393, 0.8406, 0.2664, 0.0459, 0.5459])
output:  tensor([0.5139, 0.4956, 0.7443, 0.5037, 0.2494, 0.1034, 0.6549],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0601, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

output:  tensor([0.5168, 0.4362, 0.5595, 0.6569, 0.1614, 0.0891, 0.4051],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0745, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9060, 0.2006, 0.5046, 0.9679, 0.1116, 0.0388, 0.0376])
output:  tensor([0.7609, 0.3641, 0.5758, 0.6438, 0.1914, 0.0902, 0.3934],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0419, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.4343, 0.3690, 0.5781, 0.9127, 0.3032, 0.0424, 0.5988])
output:  tensor([0.3651, 0.3659, 0.7516, 0.6731, 0.1911, 0.1011, 0.3783],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0224, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3574, 0.4629, 0.5826, 0.8165, 0.0834, 0.0253, 0.8503])
output:  tensor([0.4990, 0.4825, 0.5860, 0.6108, 0.1954, 0.1090, 0.4757],
       device='cuda:0', gr

output:  tensor([0.6331, 0.5062, 0.4462, 0.5847, 0.1834, 0.1184, 0.4099],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0357, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0353, 0.1619, 0.9186, 0.3539, 0.4109, 0.2255, 0.0490])
output:  tensor([0.3640, 0.4349, 0.6703, 0.7060, 0.1636, 0.0923, 0.4450],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0863, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0138, 0.0977, 0.8047, 0.7820, 0.2629, 0.0413, 0.2433])
output:  tensor([0.4077, 0.3515, 0.7402, 0.6084, 0.2011, 0.0850, 0.4234],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0417, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3287, 0.6663, 0.5136, 0.9188, 0.1195, 0.0337, 0.9291])
output:  tensor([0.2885, 0.5096, 0.6041, 0.7051, 0.1579, 0.1127, 0.4217],
       device='cuda:0', gr

loss:  tensor(0.0576, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.8783, 0.5675, 0.1107, 0.9051, 0.1565, 0.0347, 0.1770])
output:  tensor([0.8635, 0.4305, 0.2056, 0.6604, 0.1846, 0.1137, 0.3439],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0175, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9956, 0.5900, 0.0914, 0.9424, 0.4635, 0.0484, 0.7309])
output:  tensor([0.8454, 0.4796, 0.2021, 0.7655, 0.1489, 0.1448, 0.5261],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0326, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0107, 0.7105, 0.7505, 0.9533, 0.1188, 0.1905, 0.2859])
output:  tensor([0.1186, 0.7999, 0.5538, 0.6930, 0.1675, 0.1525, 0.5600],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0293, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

len otput[i] =  7
labels:  tensor([0.7431, 0.5638, 0.5138, 0.9447, 0.1412, 0.0644, 0.7503])
output:  tensor([0.4379, 0.5623, 0.5657, 0.6924, 0.1639, 0.1029, 0.4732],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0340, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3035, 0.8181, 0.4831, 0.6692, 0.0392, 0.1238, 0.5928])
output:  tensor([0.5829, 0.5859, 0.2983, 0.6337, 0.1931, 0.1544, 0.4148],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0320, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.7183, 0.2548, 0.9063, 0.9007, 0.1518, 0.8840])
output:  tensor([0.8486, 0.5138, 0.3236, 0.4957, 0.2585, 0.1694, 0.5276],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1109, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([8.0246e-05, 2.2828e-01, 9.1666e-01, 8.9103e-01, 2.7982e-01, 9.270

loss:  tensor(0.0813, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3887, 0.1020, 0.4555, 0.9251, 0.6940, 0.0372, 0.2370])
output:  tensor([0.3821, 0.3468, 0.7547, 0.6491, 0.1850, 0.0906, 0.4088],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0739, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0567, 0.6788, 0.7524, 0.8838, 0.1133, 0.1265, 0.7178])
output:  tensor([0.0687, 0.6865, 0.7699, 0.6219, 0.1939, 0.1307, 0.5789],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0136, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9875, 0.5351, 0.0236, 0.0202, 0.1098, 0.0306, 0.3367])
output:  tensor([0.8425, 0.4512, 0.1938, 0.6118, 0.2040, 0.1371, 0.3375],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0610, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

loss:  tensor(0.0584, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.9091, 0.1157, 0.1088, 0.9607, 0.1115, 0.0453, 0.0295])
output:  tensor([0.3991, 0.5532, 0.4457, 0.7335, 0.1451, 0.1034, 0.3537],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1038, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7183, 0.6759, 0.4265, 0.9154, 0.0907, 0.0648, 0.9146])
output:  tensor([0.6511, 0.5690, 0.3975, 0.5883, 0.1926, 0.1243, 0.5077],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.1308, 0.4118, 0.9014, 0.9536, 0.1780, 0.0455, 0.5443])
output:  tensor([0.4242, 0.3162, 0.7254, 0.7349, 0.1646, 0.0925, 0.3435],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0310, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7

labels:  tensor([0.1214, 0.4750, 0.4647, 0.0029, 0.3439, 0.5563, 0.7410])
output:  tensor([0.2528, 0.6591, 0.5438, 0.6388, 0.1749, 0.1331, 0.5608],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1003, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([2.9667e-04, 6.1612e-01, 8.7914e-01, 8.4014e-01, 3.9076e-01, 3.6356e-02,
        6.2295e-01])
output:  tensor([0.1437, 0.3972, 0.8293, 0.6924, 0.1945, 0.1218, 0.3644],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0293, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([6.9521e-01, 5.2350e-01, 6.0179e-01, 1.5000e-09, 1.3735e-01, 9.2458e-01,
        5.1638e-01])
output:  tensor([0.3636, 0.6421, 0.6795, 0.4894, 0.2274, 0.1284, 0.6174],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.3587, 0.3

len labels =  7
len otput[i] =  7
labels:  tensor([0.0252, 0.4383, 0.7331, 0.9467, 0.1985, 0.0395, 0.3472])
output:  tensor([0.5228, 0.4067, 0.5778, 0.6983, 0.1701, 0.0976, 0.3617],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0484, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.0373, 0.8672, 0.5236, 0.6174, 0.0651, 0.0933, 0.7889])
output:  tensor([0.5489, 0.5531, 0.4976, 0.5938, 0.2005, 0.1197, 0.5085],
       device='cuda:0', grad_fn=<SelectBackward0>)
loss:  tensor(0.0656, device='cuda:0', grad_fn=<MseLossBackward0>)
len labels =  7
len otput[i] =  7
labels:  tensor([0.7451, 0.6919, 0.4925, 0.8498, 0.1258, 0.0856, 0.8678])
output:  tensor([0.3164, 0.5887, 0.5789, 0.6725, 0.1824, 0.1164, 0.5048],
       device='cuda:0')
loss:  tensor(0.0528, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0021, 0.4327, 0.8246, 0.1568, 0.7232, 0.0538, 0.2460])
output:  tensor([0.2970, 0.3004, 0.8521, 0.6204

loss:  tensor(0.0148, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9942, 0.5192, 0.5410, 0.9375, 0.7555, 0.0436, 0.9816])
output:  tensor([0.6789, 0.3959, 0.6455, 0.5997, 0.2072, 0.0778, 0.4733],
       device='cuda:0')
loss:  tensor(0.1142, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1145, 0.6139, 0.9367, 0.0039, 0.1482, 0.0322, 0.9204])
output:  tensor([0.5174, 0.5483, 0.5433, 0.6413, 0.1688, 0.0920, 0.5091],
       device='cuda:0')
loss:  tensor(0.1287, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0012, 0.6507, 0.7860, 0.9264, 0.1021, 0.0408, 0.5259])
output:  tensor([0.1442, 0.5774, 0.7380, 0.6479, 0.1844, 0.1169, 0.5378],
       device='cuda:0')
loss:  tensor(0.0169, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.3012, 0.5318, 0.7645, 0.7727, 0.0837, 0.0270, 0.2425])
output:  tensor([0.4372, 0.3970, 0.8005, 0.6003, 0.2100, 0.1068, 0.4495],
       device='cuda:0')
loss:  tensor(0.0190

output:  tensor([0.3185, 0.7257, 0.6174, 0.5300, 0.2080, 0.1481, 0.6700],
       device='cuda:0')
loss:  tensor(0.0732, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9118, 0.5556, 0.9085, 0.9560, 0.0784, 0.0978, 0.8457])
output:  tensor([0.2942, 0.6341, 0.6231, 0.7282, 0.1789, 0.1370, 0.4514],
       device='cuda:0')
loss:  tensor(0.0983, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([3.8384e-05, 2.7603e-01, 8.1571e-01, 9.2643e-01, 2.7229e-01, 5.9422e-02,
        8.7516e-02])
output:  tensor([0.1033, 0.3655, 0.8564, 0.6975, 0.1798, 0.1126, 0.3025],
       device='cuda:0')
loss:  tensor(0.0186, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([9.8383e-01, 4.6533e-01, 1.1047e-01, 3.2109e-06, 1.1116e-01, 4.7049e-02,
        2.2959e-01])
output:  tensor([0.7489, 0.5209, 0.5398, 0.5512, 0.1876, 0.0980, 0.5210],
       device='cuda:0')
loss:  tensor(0.0914, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.995

len labels =  7
len otput[i] =  7
labels:  tensor([0.0008, 0.2350, 0.6625, 0.7806, 0.1124, 0.0453, 0.5156])
output:  tensor([0.4275, 0.3656, 0.7376, 0.6166, 0.1783, 0.0835, 0.4247],
       device='cuda:0')
loss:  tensor(0.0351, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1817, 0.7057, 0.6692, 0.6565, 0.1333, 0.0415, 0.2953])
output:  tensor([0.1886, 0.5200, 0.7544, 0.6531, 0.1901, 0.1018, 0.4252],
       device='cuda:0')
loss:  tensor(0.0094, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9231, 0.3377, 0.7140, 0.9313, 0.1096, 0.0361, 0.4591])
output:  tensor([0.6814, 0.3002, 0.7256, 0.6396, 0.1920, 0.0731, 0.3612],
       device='cuda:0')
loss:  tensor(0.0233, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([4.5130e-05, 5.1946e-01, 8.3361e-01, 2.7067e-01, 1.1673e-01, 5.6191e-02,
        6.2236e-01])
output:  tensor([0.1632, 0.3376, 0.8294, 0.6829, 0.1799, 0.1158, 0.3623],
       device='cuda:0')
loss:  tensor(0.0435, d

loss:  tensor(0.0353, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1830, 0.7279, 0.1155, 0.9050, 0.3403, 0.0890, 0.7415])
output:  tensor([0.7673, 0.4827, 0.1626, 0.6320, 0.2013, 0.1619, 0.3137],
       device='cuda:0')
loss:  tensor(0.0980, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0734, 0.5465, 0.5154, 0.0007, 0.0895, 0.0327, 0.5231])
output:  tensor([0.6887, 0.4826, 0.3153, 0.6532, 0.1645, 0.1070, 0.4011],
       device='cuda:0')
loss:  tensor(0.1249, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2526, 0.5841, 0.7209, 0.9655, 0.1114, 0.0405, 0.7356])
output:  tensor([0.4553, 0.4326, 0.7346, 0.6754, 0.1733, 0.0898, 0.4216],
       device='cuda:0')
loss:  tensor(0.0362, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([4.8554e-04, 7.4818e-01, 3.1405e-01, 9.4679e-01, 1.0008e-01, 4.1615e-01,
        3.6441e-01])
output:  tensor([0.1547, 0.5483, 0.6503, 0.6908, 0.1721, 0.1174, 0.4732],
       d

output:  tensor([0.1188, 0.4595, 0.8473, 0.6254, 0.2223, 0.1306, 0.4557],
       device='cuda:0')
loss:  tensor(0.0489, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([1.5352e-01, 6.3866e-01, 7.6257e-01, 5.0000e-10, 2.6485e-01, 3.0337e-01,
        6.5661e-01])
output:  tensor([0.2181, 0.5932, 0.7190, 0.5949, 0.2049, 0.1197, 0.5817],
       device='cuda:0')
loss:  tensor(0.0579, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9939, 0.2788, 0.2370, 0.8834, 0.8519, 0.0406, 0.4583])
output:  tensor([0.7943, 0.4564, 0.3293, 0.6084, 0.2127, 0.1158, 0.4655],
       device='cuda:0')
loss:  tensor(0.0814, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9949, 0.0746, 0.1827, 0.8332, 0.1366, 0.0354, 0.1015])
output:  tensor([0.7568, 0.4759, 0.4438, 0.7255, 0.1633, 0.1002, 0.4905],
       device='cuda:0')
loss:  tensor(0.0648, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2572, 0.6203, 0.4785, 0.9320, 0.0694, 0

loss:  tensor(0.0278, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([5.3678e-01, 7.1618e-01, 4.9870e-01, 5.4036e-05, 3.0513e-01, 3.8288e-01,
        7.8646e-01])
output:  tensor([0.2886, 0.6392, 0.6312, 0.5669, 0.1947, 0.1210, 0.6223],
       device='cuda:0')
loss:  tensor(0.0734, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([3.9410e-01, 8.2957e-01, 3.7741e-01, 1.6994e-04, 5.1791e-02, 3.2457e-01,
        9.6575e-01])
output:  tensor([0.5347, 0.5777, 0.5007, 0.5863, 0.1778, 0.1232, 0.5820],
       device='cuda:0')
loss:  tensor(0.0922, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9912, 0.3464, 0.9673, 0.9581, 0.3183, 0.1848, 0.1833])
output:  tensor([0.5461, 0.3151, 0.7413, 0.6323, 0.1877, 0.0877, 0.3067],
       device='cuda:0')
loss:  tensor(0.0569, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0464, 0.6571, 0.6660, 0.8936, 0.0697, 0.0488, 0.9607])
output:  tensor([0.5166, 0.5524, 0.5385, 0.591

labels:  tensor([0.0557, 0.4720, 0.9698, 0.7357, 0.3272, 0.0358, 0.8067])
output:  tensor([0.5532, 0.3423, 0.7408, 0.6490, 0.2062, 0.0908, 0.4112],
       device='cuda:0')
loss:  tensor(0.0712, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.7437, 0.2717, 0.6973, 0.9476, 0.1371, 0.1138, 0.0773])
output:  tensor([0.2864, 0.4838, 0.6720, 0.7003, 0.1672, 0.0857, 0.3436],
       device='cuda:0')
loss:  tensor(0.0555, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.7653, 0.6341, 0.4631, 0.8759, 0.1548, 0.0247, 0.5566])
output:  tensor([0.6222, 0.4248, 0.5312, 0.6552, 0.1798, 0.0829, 0.4548],
       device='cuda:0')
loss:  tensor(0.0189, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.6961, 0.4927, 0.9990, 0.9609, 0.1086, 0.1582, 0.1136])
output:  tensor([0.4594, 0.5237, 0.6819, 0.4812, 0.2313, 0.1146, 0.6357],
       device='cuda:0')
loss:  tensor(0.0967, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0

len labels =  7
len otput[i] =  7
labels:  tensor([5.2553e-01, 8.2159e-01, 4.4406e-01, 9.3528e-06, 7.4041e-02, 3.6145e-01,
        9.5192e-01])
output:  tensor([0.4380, 0.5740, 0.7305, 0.4837, 0.2376, 0.1263, 0.6766],
       device='cuda:0')
loss:  tensor(0.0775, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2508, 0.2456, 0.4643, 0.7992, 0.4457, 0.0279, 0.7998])
output:  tensor([0.7610, 0.3903, 0.4326, 0.6643, 0.1841, 0.0842, 0.4221],
       device='cuda:0')
loss:  tensor(0.0735, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9738, 0.0535, 0.2762, 0.9564, 0.1102, 0.1021, 0.0622])
output:  tensor([0.9223, 0.3426, 0.2780, 0.7179, 0.1552, 0.0753, 0.3275],
       device='cuda:0')
loss:  tensor(0.0309, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1710, 0.5591, 0.9154, 0.8854, 0.1445, 0.0506, 0.7919])
output:  tensor([0.4944, 0.4136, 0.7863, 0.5397, 0.2645, 0.1176, 0.4976],
       device='cuda:0')
loss:  tensor(0.0525, d

len labels =  7
len otput[i] =  7
labels:  tensor([0.0647, 0.2526, 0.7865, 0.8660, 0.0789, 0.0370, 0.6948])
output:  tensor([0.2912, 0.4613, 0.7693, 0.5941, 0.2001, 0.0906, 0.5216],
       device='cuda:0')
loss:  tensor(0.0310, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9918, 0.1691, 0.0207, 0.9077, 0.1016, 0.0315, 0.1721])
output:  tensor([0.9212, 0.3326, 0.1013, 0.8106, 0.1536, 0.1225, 0.2700],
       device='cuda:0')
loss:  tensor(0.0097, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9077, 0.6969, 0.3347, 0.9673, 0.1007, 0.1383, 0.9010])
output:  tensor([0.6547, 0.5242, 0.2842, 0.6994, 0.1561, 0.1290, 0.4270],
       device='cuda:0')
loss:  tensor(0.0566, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([6.0744e-04, 7.2228e-01, 8.7642e-01, 8.2485e-01, 4.3739e-02, 3.9640e-02,
        6.6703e-01])
output:  tensor([0.1649, 0.4674, 0.8540, 0.5633, 0.2486, 0.1370, 0.5079],
       device='cuda:0')
loss:  tensor(0.0339, d

output:  tensor([0.5147, 0.5575, 0.5535, 0.6400, 0.1806, 0.1191, 0.4859],
       device='cuda:0')
loss:  tensor(0.0398, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0583, 0.6056, 0.8612, 0.8599, 0.1124, 0.0330, 0.6779])
output:  tensor([0.1412, 0.5757, 0.7886, 0.5978, 0.2329, 0.1368, 0.5900],
       device='cuda:0')
loss:  tensor(0.0164, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9925, 0.2812, 0.0903, 0.9455, 0.0893, 0.0408, 0.1139])
output:  tensor([0.8657, 0.4485, 0.2112, 0.6453, 0.1908, 0.1435, 0.3345],
       device='cuda:0')
loss:  tensor(0.0312, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0019, 0.7644, 0.3666, 0.9112, 0.0821, 0.0481, 0.4878])
output:  tensor([0.2131, 0.7067, 0.4861, 0.6658, 0.1618, 0.1304, 0.4990],
       device='cuda:0')
loss:  tensor(0.0194, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2210, 0.6002, 0.5432, 0.5076, 0.2784, 0.0306, 0.0592])
output:  tensor([0.4

output:  tensor([0.5698, 0.4487, 0.7322, 0.5791, 0.2123, 0.1071, 0.4886],
       device='cuda:0')
loss:  tensor(0.0658, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0149, 0.8090, 0.3757, 0.9241, 0.1115, 0.1069, 0.2210])
output:  tensor([0.1196, 0.7694, 0.6920, 0.5890, 0.1972, 0.1603, 0.6509],
       device='cuda:0')
loss:  tensor(0.0600, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4043, 0.4766, 0.9395, 0.9549, 0.3182, 0.5660, 0.0389])
output:  tensor([0.9106, 0.4237, 0.1908, 0.7402, 0.1522, 0.1032, 0.3071],
       device='cuda:0')
loss:  tensor(0.1685, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9266, 0.2645, 0.8725, 0.9513, 0.1173, 0.1292, 0.0384])
output:  tensor([0.7992, 0.3281, 0.5911, 0.6918, 0.1886, 0.0735, 0.3971],
       device='cuda:0')
loss:  tensor(0.0434, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.7966, 0.4051, 0.4103, 0.7752, 0.1081, 0.0384, 0.6103])
output:  tensor([0.5

output:  tensor([0.1379, 0.6580, 0.6903, 0.6148, 0.1936, 0.1348, 0.6020],
       device='cuda:0')
loss:  tensor(0.0248, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9650, 0.2822, 0.9247, 0.9698, 0.1525, 0.0467, 0.2105])
output:  tensor([0.5849, 0.3612, 0.6515, 0.6822, 0.1890, 0.0898, 0.4130],
       device='cuda:0')
loss:  tensor(0.0503, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9829, 0.0670, 0.0139, 0.9746, 0.3328, 0.0511, 0.0266])
output:  tensor([0.9465, 0.4134, 0.2090, 0.6814, 0.1943, 0.1396, 0.3563],
       device='cuda:0')
loss:  tensor(0.0544, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9951, 0.2522, 0.1656, 0.9554, 0.2458, 0.0432, 0.5869])
output:  tensor([0.8187, 0.4356, 0.2455, 0.5860, 0.2489, 0.1826, 0.3689],
       device='cuda:0')
loss:  tensor(0.0392, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8310, 0.5888, 0.4204, 0.2453, 0.3237, 0.0605, 0.7530])
output:  tensor([0.5

output:  tensor([0.8033, 0.4600, 0.2458, 0.6739, 0.1907, 0.1126, 0.3272],
       device='cuda:0')
loss:  tensor(0.0193, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9795, 0.2960, 0.9368, 0.9696, 0.1916, 0.0596, 0.2038])
output:  tensor([0.8488, 0.2701, 0.4659, 0.7737, 0.1603, 0.0724, 0.2863],
       device='cuda:0')
loss:  tensor(0.0408, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.3757, 0.3975, 0.9730, 0.5133, 0.0475, 0.9654])
output:  tensor([0.9171, 0.4457, 0.1440, 0.6769, 0.1979, 0.1668, 0.4276],
       device='cuda:0')
loss:  tensor(0.0808, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.3290, 0.5356, 0.8192, 0.9392, 0.2265, 0.0331, 0.5895])
output:  tensor([0.1838, 0.5197, 0.7345, 0.7154, 0.1844, 0.0989, 0.4030],
       device='cuda:0')
loss:  tensor(0.0171, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.3499, 0.9151, 0.0817, 0.9156, 0.1065, 0.0857, 0.0886])
output:  tensor([0.6

labels:  tensor([0.3530, 0.3681, 0.9413, 0.7106, 0.1509, 0.1150, 0.4857])
output:  tensor([0.5990, 0.2921, 0.7280, 0.6857, 0.2110, 0.0796, 0.3074],
       device='cuda:0')
loss:  tensor(0.0213, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([3.0666e-04, 8.4960e-01, 7.9661e-01, 8.2302e-01, 9.8134e-02, 4.5113e-02,
        7.2410e-01])
output:  tensor([0.1404, 0.5939, 0.8010, 0.5957, 0.2298, 0.1444, 0.5576],
       device='cuda:0')
loss:  tensor(0.0274, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2860, 0.1827, 0.6915, 0.8827, 0.0935, 0.0535, 0.5091])
output:  tensor([0.4395, 0.5073, 0.2732, 0.7526, 0.1737, 0.1353, 0.2972],
       device='cuda:0')
loss:  tensor(0.0541, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4334, 0.4358, 0.8129, 0.8692, 0.1466, 0.0366, 0.5199])
output:  tensor([0.4280, 0.3447, 0.7792, 0.6515, 0.1868, 0.0902, 0.3844],
       device='cuda:0')
loss:  tensor(0.0114, device='cuda:0')
len labels =  7
le

loss:  tensor(0.0154, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9482, 0.6621, 0.0319, 0.0024, 0.0995, 0.1098, 0.2894])
output:  tensor([0.9270, 0.6140, 0.2645, 0.7954, 0.1981, 0.0953, 0.3439],
       device='cuda:0')
loss:  tensor(0.0998, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.3149, 0.1711, 0.9540, 0.3526, 0.0343, 0.7795])
output:  tensor([0.9172, 0.3895, 0.2325, 0.6548, 0.1883, 0.1473, 0.4346],
       device='cuda:0')
loss:  tensor(0.0377, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9905, 0.5183, 0.2800, 0.8692, 0.1272, 0.0354, 0.1935])
output:  tensor([0.8996, 0.3961, 0.3565, 0.6164, 0.1818, 0.0855, 0.3308],
       device='cuda:0')
loss:  tensor(0.0168, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0355, 0.8865, 0.4845, 0.8875, 0.0930, 0.3336, 0.0419])
output:  tensor([0.1432, 0.7243, 0.6719, 0.6301, 0.1750, 0.1265, 0.5777],
       device='cuda:0')
loss:  tensor(0.0680

labels:  tensor([1.6906e-01, 3.4392e-01, 8.5648e-01, 7.7600e-08, 4.0031e-01, 4.6325e-01,
        5.7542e-01])
output:  tensor([0.2019, 0.4288, 0.8073, 0.6714, 0.2017, 0.1246, 0.4197],
       device='cuda:0')
loss:  tensor(0.0914, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.3827, 0.5933, 0.6251, 0.6196, 0.0996, 0.0320, 0.9614])
output:  tensor([0.3742, 0.4847, 0.6840, 0.5910, 0.2092, 0.1071, 0.5196],
       device='cuda:0')
loss:  tensor(0.0327, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9570, 0.3717, 0.6596, 0.9077, 0.0745, 0.0861, 0.6079])
output:  tensor([0.7433, 0.4905, 0.4816, 0.5942, 0.1862, 0.0950, 0.5062],
       device='cuda:0')
loss:  tensor(0.0304, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9958, 0.3906, 0.2225, 0.9717, 0.4078, 0.0822, 0.5154])
output:  tensor([0.8972, 0.4199, 0.1756, 0.6782, 0.1672, 0.1190, 0.3857],
       device='cuda:0')
loss:  tensor(0.0250, device='cuda:0')
len labels =  7
le

labels:  tensor([0.9879, 0.1421, 0.1509, 0.8413, 0.1169, 0.0288, 0.1194])
output:  tensor([0.7823, 0.3881, 0.5019, 0.6859, 0.1673, 0.0734, 0.3446],
       device='cuda:0')
loss:  tensor(0.0436, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9749, 0.4136, 0.9102, 0.9442, 0.1576, 0.0459, 0.2782])
output:  tensor([0.8008, 0.4077, 0.3845, 0.7520, 0.1694, 0.0877, 0.2993],
       device='cuda:0')
loss:  tensor(0.0494, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0158, 0.7168, 0.7876, 0.7804, 0.1107, 0.0450, 0.5490])
output:  tensor([0.2861, 0.6419, 0.6788, 0.5556, 0.2151, 0.1383, 0.5937],
       device='cuda:0')
loss:  tensor(0.0232, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([2.3578e-05, 6.2255e-01, 3.4026e-01, 9.2029e-01, 8.7766e-02, 1.4567e-01,
        3.4447e-01])
output:  tensor([0.0500, 0.5589, 0.7990, 0.7604, 0.1951, 0.1572, 0.3204],
       device='cuda:0')
loss:  tensor(0.0364, device='cuda:0')
len labels =  7
le

loss:  tensor(0.0782, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9846, 0.3860, 0.8833, 0.9363, 0.3053, 0.1054, 0.6251])
output:  tensor([0.7603, 0.4093, 0.6161, 0.6190, 0.2098, 0.0948, 0.5178],
       device='cuda:0')
loss:  tensor(0.0348, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0015, 0.3149, 0.7233, 0.9082, 0.0748, 0.0734, 0.4309])
output:  tensor([0.1921, 0.7215, 0.6521, 0.5758, 0.1921, 0.1203, 0.6449],
       device='cuda:0')
loss:  tensor(0.0541, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5444, 0.3741, 0.8056, 0.7365, 0.0817, 0.0371, 0.6018])
output:  tensor([0.2818, 0.5429, 0.6663, 0.6558, 0.1813, 0.0988, 0.4847],
       device='cuda:0')
loss:  tensor(0.0215, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9161, 0.3618, 0.8520, 0.9254, 0.1025, 0.0506, 0.5852])
output:  tensor([0.5762, 0.3452, 0.6206, 0.7803, 0.1657, 0.0814, 0.3661],
       device='cuda:0')
loss:  tensor(0.0348

output:  tensor([0.8463, 0.4672, 0.2372, 0.6324, 0.2064, 0.1416, 0.3962],
       device='cuda:0')
loss:  tensor(0.0752, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0168, 0.7576, 0.5197, 0.0520, 0.1218, 0.0361, 0.4975])
output:  tensor([0.1257, 0.6698, 0.7093, 0.7391, 0.1985, 0.1477, 0.5143],
       device='cuda:0')
loss:  tensor(0.0780, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5286, 0.5650, 0.2777, 0.0496, 0.1217, 0.0717, 0.2483])
output:  tensor([0.4703, 0.6433, 0.4617, 0.5642, 0.1921, 0.1232, 0.5171],
       device='cuda:0')
loss:  tensor(0.0554, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([4.2072e-01, 5.7802e-01, 5.4938e-01, 1.4492e-05, 2.2572e-01, 3.6849e-02,
        7.7483e-01])
output:  tensor([0.2193, 0.4959, 0.7479, 0.5909, 0.1936, 0.1072, 0.5281],
       device='cuda:0')
loss:  tensor(0.0718, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5320, 0.4367, 0.9839, 0.9512, 0.1212, 0

output:  tensor([0.6530, 0.5619, 0.2123, 0.6721, 0.1951, 0.1864, 0.4271],
       device='cuda:0')
loss:  tensor(0.0726, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([3.1477e-04, 4.4596e-01, 6.7618e-01, 9.3859e-01, 1.0901e-01, 7.0281e-02,
        6.1155e-01])
output:  tensor([0.0832, 0.7237, 0.6957, 0.6106, 0.1884, 0.1468, 0.6155],
       device='cuda:0')
loss:  tensor(0.0292, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9358, 0.3651, 0.3471, 0.8960, 0.1150, 0.0313, 0.4383])
output:  tensor([0.6096, 0.4195, 0.5660, 0.6930, 0.1962, 0.1271, 0.3776],
       device='cuda:0')
loss:  tensor(0.0311, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9708, 0.3310, 0.8517, 0.9320, 0.0865, 0.0358, 0.8390])
output:  tensor([0.6089, 0.3427, 0.6522, 0.6314, 0.2118, 0.0899, 0.4181],
       device='cuda:0')
loss:  tensor(0.0653, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9933, 0.2994, 0.5775, 0.9568, 0.1257, 0

labels:  tensor([0.7872, 0.5366, 0.8258, 0.4961, 0.1171, 0.0391, 0.9536])
output:  tensor([0.8217, 0.3944, 0.5821, 0.6110, 0.2001, 0.0766, 0.4750],
       device='cuda:0')
loss:  tensor(0.0473, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0335, 0.3051, 0.7403, 0.8116, 0.1055, 0.0420, 0.2559])
output:  tensor([0.1624, 0.4788, 0.7599, 0.6839, 0.1778, 0.1030, 0.3955],
       device='cuda:0')
loss:  tensor(0.0131, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0027, 0.3661, 0.2800, 0.0046, 0.1720, 0.0640, 0.0705])
output:  tensor([0.1988, 0.6293, 0.5926, 0.6420, 0.1640, 0.1174, 0.5158],
       device='cuda:0')
loss:  tensor(0.1162, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0237, 0.8164, 0.7202, 0.9358, 0.0654, 0.0505, 0.5400])
output:  tensor([0.3513, 0.6119, 0.7065, 0.5789, 0.2120, 0.1338, 0.6085],
       device='cuda:0')
loss:  tensor(0.0443, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9

loss:  tensor(0.0330, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0301, 0.3366, 0.5340, 0.8870, 0.1116, 0.0421, 0.1729])
output:  tensor([0.3219, 0.4114, 0.6450, 0.6591, 0.1912, 0.1033, 0.4978],
       device='cuda:0')
loss:  tensor(0.0387, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2841, 0.6209, 0.5897, 0.9025, 0.1278, 0.5998, 0.2501])
output:  tensor([0.3761, 0.6871, 0.6781, 0.5183, 0.2292, 0.1626, 0.6551],
       device='cuda:0')
loss:  tensor(0.0762, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9035, 0.5214, 0.3331, 0.4445, 0.1434, 0.0311, 0.1401])
output:  tensor([0.5598, 0.5119, 0.5592, 0.5714, 0.2033, 0.0932, 0.5030],
       device='cuda:0')
loss:  tensor(0.0464, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0076, 0.1954, 0.0434, 0.9549, 0.1102, 0.0391, 0.0689])
output:  tensor([0.3621, 0.6158, 0.3011, 0.6088, 0.1911, 0.1830, 0.3938],
       device='cuda:0')
loss:  tensor(0.0888

loss:  tensor(0.0267, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([9.8213e-02, 6.1160e-01, 9.3712e-01, 2.0000e-10, 1.2165e-01, 4.1521e-01,
        5.8168e-01])
output:  tensor([0.2817, 0.5546, 0.7135, 0.5828, 0.2078, 0.1043, 0.5403],
       device='cuda:0')
loss:  tensor(0.0760, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1767, 0.4634, 0.9890, 0.6419, 0.6101, 0.3494, 0.1476])
output:  tensor([0.1737, 0.4471, 0.8230, 0.6464, 0.2131, 0.1222, 0.4025],
       device='cuda:0')
loss:  tensor(0.0432, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0537, 0.6834, 0.7532, 0.8269, 0.1067, 0.0408, 0.4494])
output:  tensor([0.2195, 0.6967, 0.6244, 0.5918, 0.1912, 0.1337, 0.5900],
       device='cuda:0')
loss:  tensor(0.0193, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0133, 0.5337, 0.8017, 0.1291, 0.2455, 0.0357, 0.4349])
output:  tensor([0.4397, 0.4014, 0.6734, 0.5921, 0.2039, 0.0916, 0.4899],
       d

output:  tensor([0.4203, 0.5635, 0.6863, 0.6213, 0.1951, 0.1046, 0.5064],
       device='cuda:0')
loss:  tensor(0.0598, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0012, 0.4569, 0.4665, 0.8823, 0.1907, 0.0322, 0.4099])
output:  tensor([0.3761, 0.3975, 0.7593, 0.6072, 0.1983, 0.0943, 0.4829],
       device='cuda:0')
loss:  tensor(0.0450, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([7.8244e-01, 4.0739e-01, 6.9489e-01, 4.9655e-04, 1.9315e-01, 7.2209e-02,
        6.4429e-01])
output:  tensor([0.3755, 0.2698, 0.8346, 0.6446, 0.2046, 0.1025, 0.3132],
       device='cuda:0')
loss:  tensor(0.1042, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([8.0217e-01, 5.8736e-01, 4.7906e-01, 8.1440e-07, 9.8501e-02, 2.9864e-02,
        4.7687e-01])
output:  tensor([0.6808, 0.4759, 0.4001, 0.6555, 0.1637, 0.1035, 0.4647],
       device='cuda:0')
loss:  tensor(0.0676, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.985

loss:  tensor(0.0801, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([8.7561e-05, 4.4525e-01, 9.9581e-01, 7.4234e-01, 3.3342e-01, 7.2464e-02,
        3.1464e-01])
output:  tensor([0.6050, 0.2467, 0.7685, 0.6295, 0.2051, 0.0846, 0.2975],
       device='cuda:0')
loss:  tensor(0.0695, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5383, 0.4775, 0.9110, 0.8752, 0.1523, 0.0357, 0.8472])
output:  tensor([0.4404, 0.4096, 0.6661, 0.6640, 0.1765, 0.1013, 0.4713],
       device='cuda:0')
loss:  tensor(0.0379, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([2.9689e-01, 4.4559e-01, 7.5476e-01, 1.4054e-05, 2.0669e-01, 7.5349e-02,
        2.8642e-01])
output:  tensor([0.5176, 0.3802, 0.7979, 0.6381, 0.1960, 0.0780, 0.3430],
       device='cuda:0')
loss:  tensor(0.0665, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4338, 0.7280, 0.3979, 0.8026, 0.1215, 0.0524, 0.2500])
output:  tensor([0.4187, 0.6010, 0.4535, 0.650

loss:  tensor(0.0095, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4263, 0.7394, 0.7334, 0.8723, 0.0924, 0.0401, 0.6305])
output:  tensor([0.4296, 0.4367, 0.7472, 0.5954, 0.2203, 0.0950, 0.4498],
       device='cuda:0')
loss:  tensor(0.0315, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9931, 0.3511, 0.0449, 0.2923, 0.0863, 0.0320, 0.0379])
output:  tensor([0.8730, 0.4155, 0.2800, 0.6411, 0.1533, 0.0797, 0.3428],
       device='cuda:0')
loss:  tensor(0.0422, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([8.7045e-05, 3.0569e-01, 9.1703e-01, 9.2009e-01, 9.6073e-02, 4.0672e-02,
        4.4332e-01])
output:  tensor([0.2396, 0.3818, 0.7976, 0.6471, 0.1956, 0.1132, 0.3788],
       device='cuda:0')
loss:  tensor(0.0245, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([1.9078e-02, 5.5538e-01, 7.8931e-01, 6.3960e-06, 2.9479e-01, 3.4694e-02,
        7.9014e-01])
output:  tensor([0.2158, 0.3191, 0.8709, 0.606

loss:  tensor(0.0319, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([3.0481e-03, 7.9784e-01, 8.6674e-01, 9.7055e-05, 2.5345e-01, 2.5073e-01,
        7.9703e-01])
output:  tensor([0.0760, 0.5933, 0.8205, 0.6596, 0.2162, 0.1242, 0.4961],
       device='cuda:0')
loss:  tensor(0.0846, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0012, 0.2747, 0.6684, 0.5462, 0.1601, 0.0429, 0.1582])
output:  tensor([0.1158, 0.4885, 0.7657, 0.6736, 0.1834, 0.1301, 0.4270],
       device='cuda:0')
loss:  tensor(0.0236, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9951, 0.4072, 0.9772, 0.9654, 0.2760, 0.0803, 0.4871])
output:  tensor([0.8048, 0.2899, 0.7089, 0.6835, 0.2056, 0.0777, 0.2923],
       device='cuda:0')
loss:  tensor(0.0349, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([1.0913e-04, 4.6817e-01, 8.3882e-01, 8.3638e-01, 5.5945e-01, 3.7006e-02,
        5.2319e-01])
output:  tensor([0.4062, 0.5293, 0.5809, 0.600

output:  tensor([0.4734, 0.3971, 0.6792, 0.6495, 0.1791, 0.0929, 0.4483],
       device='cuda:0')
loss:  tensor(0.0497, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8577, 0.6681, 0.1063, 0.9353, 0.1061, 0.0358, 0.1868])
output:  tensor([0.4851, 0.5595, 0.4018, 0.6954, 0.1446, 0.1229, 0.3844],
       device='cuda:0')
loss:  tensor(0.0491, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5394, 0.7356, 0.6762, 0.4543, 0.1674, 0.3190, 0.5821])
output:  tensor([0.2068, 0.6372, 0.6976, 0.5986, 0.2045, 0.1262, 0.5840],
       device='cuda:0')
loss:  tensor(0.0257, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4102, 0.7837, 0.4396, 0.8960, 0.1028, 0.0473, 0.8248])
output:  tensor([0.3047, 0.6953, 0.5625, 0.5661, 0.1799, 0.1224, 0.6046],
       device='cuda:0')
loss:  tensor(0.0290, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1298, 0.3431, 0.3190, 0.8828, 0.1416, 0.0352, 0.0600])
output:  tensor([0.8

loss:  tensor(0.0242, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0221, 0.8243, 0.7029, 0.6584, 0.3276, 0.1337, 0.2503])
output:  tensor([0.2271, 0.7069, 0.6265, 0.5542, 0.1847, 0.1202, 0.6468],
       device='cuda:0')
loss:  tensor(0.0357, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5816, 0.6633, 0.7984, 0.6262, 0.1284, 0.0315, 0.7639])
output:  tensor([0.4051, 0.4982, 0.7167, 0.5639, 0.2228, 0.1151, 0.4964],
       device='cuda:0')
loss:  tensor(0.0223, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1631, 0.4010, 0.8177, 0.1933, 0.3913, 0.0274, 0.6191])
output:  tensor([0.6889, 0.3581, 0.6270, 0.6485, 0.1902, 0.0923, 0.4294],
       device='cuda:0')
loss:  tensor(0.0861, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8809, 0.6201, 0.5038, 0.9175, 0.1282, 0.0403, 0.4426])
output:  tensor([0.5113, 0.4475, 0.6321, 0.6865, 0.1836, 0.0884, 0.3943],
       device='cuda:0')
loss:  tensor(0.0348

len labels =  7
len otput[i] =  7
labels:  tensor([0.7614, 0.8095, 0.5574, 0.9691, 0.1107, 0.1187, 0.5549])
output:  tensor([0.8425, 0.4792, 0.1235, 0.7896, 0.1385, 0.1247, 0.2667],
       device='cuda:0')
loss:  tensor(0.0600, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([1.0694e-02, 6.6580e-01, 5.4290e-01, 6.4609e-04, 1.4672e-01, 3.2987e-02,
        6.8748e-01])
output:  tensor([0.6422, 0.4977, 0.3804, 0.6435, 0.1503, 0.0996, 0.4098],
       device='cuda:0')
loss:  tensor(0.1355, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1962, 0.6183, 0.3396, 0.7810, 0.3675, 0.0459, 0.1332])
output:  tensor([0.1783, 0.7358, 0.4849, 0.6393, 0.1624, 0.1380, 0.5313],
       device='cuda:0')
loss:  tensor(0.0378, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9685, 0.4099, 0.3624, 0.4249, 0.1039, 0.0344, 0.8199])
output:  tensor([0.7350, 0.4026, 0.4927, 0.6548, 0.1821, 0.0760, 0.4184],
       device='cuda:0')
loss:  tensor(0.0419, d

len labels =  7
len otput[i] =  7
labels:  tensor([0.0100, 0.2817, 0.4717, 0.0068, 0.0883, 0.0316, 0.2537])
output:  tensor([0.4907, 0.4819, 0.6687, 0.6559, 0.1968, 0.0929, 0.3897],
       device='cuda:0')
loss:  tensor(0.1093, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.6254, 0.6712, 0.9046, 0.0228, 0.0839, 0.0558, 0.5617])
output:  tensor([0.2964, 0.4914, 0.7442, 0.6169, 0.1943, 0.1115, 0.4927],
       device='cuda:0')
loss:  tensor(0.0770, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9011, 0.6848, 0.7061, 0.9546, 0.3488, 0.0636, 0.5617])
output:  tensor([0.8775, 0.3139, 0.4878, 0.7026, 0.1731, 0.0735, 0.3310],
       device='cuda:0')
loss:  tensor(0.0476, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9899, 0.5462, 0.0493, 0.7035, 0.2924, 0.0330, 0.0703])
output:  tensor([0.9248, 0.3526, 0.1241, 0.7798, 0.1394, 0.1127, 0.2405],
       device='cuda:0')
loss:  tensor(0.0160, device='cuda:0')
len labels =  7
len 

loss:  tensor(0.0543, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0009, 0.2604, 0.5849, 0.7538, 0.2393, 0.0316, 0.5293])
output:  tensor([0.6060, 0.4681, 0.5830, 0.5674, 0.2024, 0.0788, 0.5276],
       device='cuda:0')
loss:  tensor(0.0640, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([6.7060e-04, 7.3796e-01, 7.0611e-01, 9.1077e-01, 1.9177e-01, 4.3057e-02,
        6.9674e-01])
output:  tensor([0.2095, 0.6141, 0.7082, 0.5948, 0.2079, 0.1102, 0.5835],
       device='cuda:0')
loss:  tensor(0.0252, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0039, 0.8446, 0.4073, 0.0981, 0.2886, 0.2438, 0.1117])
output:  tensor([0.4348, 0.6926, 0.3344, 0.5640, 0.1992, 0.1844, 0.5055],
       device='cuda:0')
loss:  tensor(0.0854, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0539, 0.2960, 0.5857, 0.9206, 0.4677, 0.0277, 0.5880])
output:  tensor([0.3327, 0.4752, 0.7221, 0.6721, 0.1790, 0.0884, 0.4374],
       d

output:  tensor([0.5267, 0.5739, 0.6514, 0.5236, 0.2188, 0.1200, 0.5741],
       device='cuda:0')
loss:  tensor(0.0541, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([7.4685e-01, 7.0196e-01, 2.9687e-01, 6.3174e-06, 1.8637e-01, 3.3149e-01,
        7.0064e-01])
output:  tensor([0.5325, 0.5706, 0.4150, 0.5964, 0.1843, 0.1333, 0.5016],
       device='cuda:0')
loss:  tensor(0.0731, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2858, 0.5419, 0.8731, 0.8771, 0.2251, 0.0594, 0.7730])
output:  tensor([0.5461, 0.4947, 0.5338, 0.6668, 0.1651, 0.0860, 0.4878],
       device='cuda:0')
loss:  tensor(0.0450, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.3701, 0.4050, 0.2495, 0.9644, 0.1057, 0.0297, 0.2240])
output:  tensor([0.6253, 0.5754, 0.3504, 0.6961, 0.1545, 0.1220, 0.4175],
       device='cuda:0')
loss:  tensor(0.0321, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.7707, 0.6905, 0.6550, 0.9414, 0.1517, 0

loss:  tensor(0.0326, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0452, 0.3768, 0.6890, 0.3595, 0.0981, 0.0414, 0.6721])
output:  tensor([0.2925, 0.3827, 0.8080, 0.6196, 0.1893, 0.1068, 0.4118],
       device='cuda:0')
loss:  tensor(0.0319, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1994, 0.7842, 0.5948, 0.7913, 0.1519, 0.0928, 0.5550])
output:  tensor([0.3975, 0.6506, 0.4948, 0.6182, 0.1747, 0.1040, 0.5281],
       device='cuda:0')
loss:  tensor(0.0141, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1257, 0.2759, 0.0118, 0.7990, 0.2643, 0.0371, 0.3171])
output:  tensor([0.9010, 0.4192, 0.4121, 0.6459, 0.1681, 0.0692, 0.4277],
       device='cuda:0')
loss:  tensor(0.1183, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9836, 0.6027, 0.0070, 0.7917, 0.0800, 0.4211, 0.7700])
output:  tensor([0.9151, 0.3940, 0.1726, 0.6953, 0.1781, 0.1144, 0.2610],
       device='cuda:0')
loss:  tensor(0.0640

loss:  tensor(0.0197, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.6742, 0.2100, 0.6490, 0.8322, 0.1559, 0.1369, 0.4674])
output:  tensor([0.6385, 0.4432, 0.4536, 0.7069, 0.1427, 0.0751, 0.3880],
       device='cuda:0')
loss:  tensor(0.0171, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0021, 0.5677, 0.5687, 0.8773, 0.0860, 0.0345, 0.8136])
output:  tensor([0.4834, 0.4391, 0.6282, 0.6019, 0.1873, 0.1020, 0.4612],
       device='cuda:0')
loss:  tensor(0.0667, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9939, 0.5902, 0.0248, 0.6939, 0.3749, 0.5052, 0.6810])
output:  tensor([0.9435, 0.3795, 0.1217, 0.7720, 0.1426, 0.1092, 0.2312],
       device='cuda:0')
loss:  tensor(0.0679, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0666, 0.8015, 0.6515, 0.9154, 0.0661, 0.1840, 0.3138])
output:  tensor([0.3962, 0.6490, 0.5286, 0.5921, 0.1819, 0.1427, 0.5527],
       device='cuda:0')
loss:  tensor(0.0462

output:  tensor([0.7266, 0.2875, 0.6967, 0.6089, 0.1926, 0.0691, 0.3938],
       device='cuda:0')
loss:  tensor(0.1027, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9382, 0.3401, 0.0408, 0.2142, 0.0868, 0.0427, 0.1230])
output:  tensor([0.8948, 0.3754, 0.1095, 0.7755, 0.1550, 0.1399, 0.2424],
       device='cuda:0')
loss:  tensor(0.0502, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.3914, 0.4865, 0.3149, 0.0092, 0.1748, 0.0321, 0.2116])
output:  tensor([0.8039, 0.3650, 0.4747, 0.6864, 0.1734, 0.0808, 0.3551],
       device='cuda:0')
loss:  tensor(0.0989, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2758, 0.4165, 0.6460, 0.6330, 0.0730, 0.0895, 0.6880])
output:  tensor([0.6391, 0.3745, 0.6813, 0.6184, 0.2048, 0.0890, 0.4338],
       device='cuda:0')
loss:  tensor(0.0310, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0026, 0.3957, 0.6879, 0.9562, 0.0979, 0.0350, 0.2939])
output:  tensor([0.0

output:  tensor([0.5467, 0.6151, 0.4972, 0.4806, 0.2234, 0.1400, 0.6161],
       device='cuda:0')
loss:  tensor(0.0700, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8943, 0.3132, 0.6572, 0.9364, 0.0921, 0.0388, 0.7028])
output:  tensor([0.6857, 0.3636, 0.7022, 0.6283, 0.1846, 0.0694, 0.4262],
       device='cuda:0')
loss:  tensor(0.0327, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0708, 0.4767, 0.3539, 0.9593, 0.1074, 0.0503, 0.3544])
output:  tensor([0.1887, 0.6229, 0.4804, 0.7366, 0.1474, 0.1003, 0.3916],
       device='cuda:0')
loss:  tensor(0.0152, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9891, 0.6640, 0.2414, 0.9265, 0.1165, 0.0397, 0.4067])
output:  tensor([0.7219, 0.5629, 0.4624, 0.6117, 0.1828, 0.0985, 0.4933],
       device='cuda:0')
loss:  tensor(0.0350, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0050, 0.6194, 0.8311, 0.0043, 0.2594, 0.0369, 0.8900])
output:  tensor([0.3

loss:  tensor(0.1107, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.6269, 0.6268, 0.6632, 0.7525, 0.1237, 0.0626, 0.2052])
output:  tensor([0.6475, 0.6103, 0.3691, 0.5002, 0.2216, 0.1651, 0.4866],
       device='cuda:0')
loss:  tensor(0.0357, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8762, 0.5201, 0.3305, 0.0159, 0.0923, 0.0377, 0.8217])
output:  tensor([0.5064, 0.5029, 0.5624, 0.6013, 0.1943, 0.0978, 0.5124],
       device='cuda:0')
loss:  tensor(0.0919, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0028, 0.7535, 0.6111, 0.9194, 0.2260, 0.0655, 0.7172])
output:  tensor([0.0784, 0.7878, 0.5955, 0.6939, 0.1574, 0.1423, 0.5494],
       device='cuda:0')
loss:  tensor(0.0138, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1657, 0.8348, 0.4204, 0.8433, 0.0789, 0.0808, 0.6683])
output:  tensor([0.4318, 0.6271, 0.4713, 0.5868, 0.1604, 0.1193, 0.5380],
       device='cuda:0')
loss:  tensor(0.0296

labels:  tensor([1.7292e-01, 4.2599e-01, 7.7313e-01, 1.8000e-09, 1.4089e-01, 2.9955e-02,
        7.3769e-01])
output:  tensor([0.7085, 0.3607, 0.6112, 0.6555, 0.1928, 0.0828, 0.4239],
       device='cuda:0')
loss:  tensor(0.1216, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.7256, 0.5409, 0.3309, 0.8397, 0.1114, 0.0360, 0.2688])
output:  tensor([0.2379, 0.6365, 0.5968, 0.6309, 0.1594, 0.0975, 0.5150],
       device='cuda:0')
loss:  tensor(0.0612, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8411, 0.5637, 0.1894, 0.0195, 0.1233, 0.0335, 0.2944])
output:  tensor([0.7916, 0.5184, 0.4195, 0.5877, 0.1825, 0.0905, 0.4600],
       device='cuda:0')
loss:  tensor(0.0592, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([2.2122e-01, 2.3950e-01, 6.3366e-01, 7.5734e-06, 2.5708e-01, 3.6349e-02,
        3.9697e-01])
output:  tensor([0.4786, 0.3410, 0.7324, 0.6248, 0.2000, 0.0876, 0.4422],
       device='cuda:0')
loss:  tensor(0.0692,

loss:  tensor(0.0726, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9905, 0.0806, 0.4185, 0.3316, 0.1406, 0.0481, 0.0650])
output:  tensor([0.3395, 0.4868, 0.6330, 0.6327, 0.1894, 0.1264, 0.4500],
       device='cuda:0')
loss:  tensor(0.1260, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8831, 0.2283, 0.9416, 0.8922, 0.1526, 0.0593, 0.3800])
output:  tensor([0.5979, 0.3012, 0.7781, 0.6491, 0.1884, 0.0751, 0.3151],
       device='cuda:0')
loss:  tensor(0.0255, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0267, 0.1226, 0.3121, 0.9764, 0.2541, 0.0452, 0.4110])
output:  tensor([0.4074, 0.3280, 0.7596, 0.6625, 0.1768, 0.0719, 0.3637],
       device='cuda:0')
loss:  tensor(0.0707, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0102, 0.4317, 0.7863, 0.0191, 0.1000, 0.7873, 0.0538])
output:  tensor([0.3254, 0.4641, 0.7669, 0.6601, 0.2125, 0.1248, 0.3598],
       device='cuda:0')
loss:  tensor(0.1510

loss:  tensor(0.0931, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1506, 0.7495, 0.9719, 0.8149, 0.2708, 0.1117, 0.7180])
output:  tensor([0.1992, 0.4189, 0.8289, 0.6602, 0.2094, 0.1182, 0.4125],
       device='cuda:0')
loss:  tensor(0.0362, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0155, 0.7184, 0.7811, 0.8726, 0.2743, 0.0276, 0.3489])
output:  tensor([0.1387, 0.4993, 0.8186, 0.6107, 0.2168, 0.1212, 0.4968],
       device='cuda:0')
loss:  tensor(0.0239, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([9.2916e-01, 1.6154e-01, 5.9011e-05, 9.4965e-01, 9.1073e-02, 1.2253e-01,
        2.9741e-02])
output:  tensor([0.8600, 0.3673, 0.2484, 0.7067, 0.1731, 0.1093, 0.3223],
       device='cuda:0')
loss:  tensor(0.0372, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0095, 0.5467, 0.8068, 0.0023, 0.0945, 0.0503, 0.5302])
output:  tensor([0.2968, 0.3803, 0.7268, 0.7132, 0.1867, 0.1020, 0.3774],
       d

loss:  tensor(0.0469, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([5.7686e-04, 2.1979e-01, 9.5444e-01, 8.0835e-01, 1.3429e-01, 1.0603e-01,
        4.5508e-01])
output:  tensor([0.2745, 0.3991, 0.7573, 0.6647, 0.1904, 0.0993, 0.3924],
       device='cuda:0')
loss:  tensor(0.0248, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0043, 0.3181, 0.9916, 0.0043, 0.2375, 0.1146, 0.0540])
output:  tensor([0.8003, 0.3572, 0.5772, 0.6512, 0.2042, 0.0814, 0.4243],
       device='cuda:0')
loss:  tensor(0.1950, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9943, 0.5707, 0.3721, 0.9727, 0.0922, 0.0425, 0.0376])
output:  tensor([0.6712, 0.3955, 0.4542, 0.7746, 0.1509, 0.0875, 0.3109],
       device='cuda:0')
loss:  tensor(0.0373, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9942, 0.5101, 0.5806, 0.9655, 0.4602, 0.0886, 0.0669])
output:  tensor([0.7361, 0.5192, 0.3166, 0.6499, 0.2232, 0.1850, 0.5081],
       d

output:  tensor([0.6813, 0.4820, 0.4023, 0.6728, 0.1501, 0.0854, 0.4000],
       device='cuda:0')
loss:  tensor(0.0906, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.7084, 0.2704, 0.5700, 0.9699, 0.1133, 0.0349, 0.0851])
output:  tensor([0.8676, 0.2851, 0.4839, 0.7576, 0.1543, 0.0615, 0.2882],
       device='cuda:0')
loss:  tensor(0.0174, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4617, 0.2916, 0.2516, 0.8804, 0.1324, 0.0376, 0.1216])
output:  tensor([0.2807, 0.5213, 0.5677, 0.7141, 0.1689, 0.0933, 0.4272],
       device='cuda:0')
loss:  tensor(0.0444, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1179, 0.8322, 0.4958, 0.8649, 0.1102, 0.0809, 0.5520])
output:  tensor([0.3240, 0.5700, 0.5813, 0.6809, 0.1744, 0.1150, 0.4923],
       device='cuda:0')
loss:  tensor(0.0230, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0562, 0.4203, 0.8553, 0.9123, 0.0555, 0.0317, 0.1735])
output:  tensor([0.3

labels:  tensor([0.6158, 0.7963, 0.3255, 0.8875, 0.1153, 0.0637, 0.6403])
output:  tensor([0.4380, 0.6626, 0.4779, 0.6050, 0.1829, 0.1254, 0.5142],
       device='cuda:0')
loss:  tensor(0.0253, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.5349, 0.4626, 0.3844, 0.8934, 0.1016, 0.0441, 0.1365])
output:  tensor([0.4427, 0.5168, 0.5724, 0.6952, 0.1537, 0.0798, 0.4362],
       device='cuda:0')
loss:  tensor(0.0257, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.4207, 0.3419, 0.5297, 0.9385, 0.1996, 0.0424, 0.1174])
output:  tensor([0.3132, 0.3912, 0.7509, 0.6418, 0.1984, 0.0974, 0.4193],
       device='cuda:0')
loss:  tensor(0.0350, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.0075, 0.7594, 0.5163, 0.9230, 0.2038, 0.1063, 0.4220])
output:  tensor([0.4898, 0.6471, 0.5054, 0.5144, 0.1841, 0.1328, 0.6130],
       device='cuda:0')
loss:  tensor(0.0643, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9

loss:  tensor(0.0302, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.8345, 0.5693, 0.2948, 0.0454, 0.7521, 0.0667, 0.8006])
output:  tensor([0.5740, 0.5657, 0.4696, 0.5617, 0.1868, 0.1220, 0.5222],
       device='cuda:0')
loss:  tensor(0.1093, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.9599, 0.2913, 0.8010, 0.9641, 0.1143, 0.0701, 0.0429])
output:  tensor([0.7075, 0.3579, 0.5563, 0.6521, 0.1775, 0.0961, 0.3619],
       device='cuda:0')
loss:  tensor(0.0474, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.2102, 0.7061, 0.5114, 0.8537, 0.2784, 0.0329, 0.3049])
output:  tensor([0.3444, 0.6177, 0.4029, 0.7576, 0.1432, 0.1061, 0.3620],
       device='cuda:0')
loss:  tensor(0.0105, device='cuda:0')
len labels =  7
len otput[i] =  7
labels:  tensor([0.1685, 0.4961, 0.3394, 0.7696, 0.2494, 0.0476, 0.5292])
output:  tensor([0.5359, 0.4795, 0.5380, 0.6478, 0.1848, 0.1081, 0.4336],
       device='cuda:0')
loss:  tensor(0.0295

In [330]:
modelSGDLR = MultilabelClassifier()
modelSGDLR.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [331]:
start = time.time()
epochs = 5
training2(modelSGDLR, train_dl, 5, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 6.507816326618195
average loss: 5.369981145858764
average loss: 4.785802811384201
average loss: 4.425922434031963
acousticness loss: 0.52
danceability loss: 0.45
energy loss: 0.45
instrumentalness loss: 0.44
liveness loss: 0.46
speechiness loss: 0.46
valence loss: 0.42
Average loss: 3.20
Epoch 1
average loss: 3.307794749736786
average loss: 3.2608783155679704
average loss: 3.2362954556941985
average loss: 3.204338525235653
acousticness loss: 0.48
danceability loss: 0.42
energy loss: 0.43
instrumentalness loss: 0.42
liveness loss: 0.44
speechiness loss: 0.41
valence loss: 0.40
Average loss: 3.00
Epoch 2
average loss: 3.1190743505954743
average loss: 3.038457578420639
average loss: 3.035413517554601
average loss: 3.0184851229190826
acousticness loss: 0.46
danceability loss: 0.40
energy loss: 0.41
instrumentalness loss: 0.40
liveness loss: 0.42
speechiness loss: 0.41
valence loss: 0.39
Average loss: 2.89
Epoch 3
average loss: 2.974479979276657
average loss: 2.9347055

In [306]:
modelMoreLR = MultilabelClassifier()
modelMoreLR.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [307]:
start = time.time()
epochs = 5
training2(modelMoreLR, train_dl, 3, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 6.47125391960144
average loss: 5.629506242275238
average loss: 4.840286892652512
average loss: 4.399884702265263
acousticness loss: 0.48
danceability loss: 0.40
energy loss: 0.42
instrumentalness loss: 0.42
liveness loss: 0.42
speechiness loss: 0.40
valence loss: 0.39
Average loss: 2.93
Epoch 1
average loss: 2.934494560956955
average loss: 2.8896665394306185
average loss: 2.8390700856844586
average loss: 2.8003051400184633
acousticness loss: 0.42
danceability loss: 0.38
energy loss: 0.38
instrumentalness loss: 0.38
liveness loss: 0.38
speechiness loss: 0.37
valence loss: 0.36
Average loss: 2.67
Epoch 2
average loss: 2.697541856765747
average loss: 2.681709146499634
average loss: 2.641622468829155
average loss: 2.6058847688138487
acousticness loss: 0.38
danceability loss: 0.34
energy loss: 0.36
instrumentalness loss: 0.35
liveness loss: 0.34
speechiness loss: 0.34
valence loss: 0.33
Average loss: 2.43
Finished Training
Train time 4125.0666790008545 seconds, 825.013

In [423]:
batch_size = 32# choose an appropriate batch size
new_loader = DataLoader(toBeLabeledData, batch_size=batch_size)

# Evaluate the model on the new set of spectrograms
modelSGDLR.eval()
with torch.no_grad():
    for batch in val_dl:
        inputs = batch[0].to(device)
        outputs = modelSGDLR(inputs)
        for i in range(batch_size):
            print("guess:",outputs[i])
            print("label:",extractLabels(batch[1],i))
#         print(outputs)
#         print(batch[1])

guess: tensor([0.0509, 0.7688, 0.5805, 0.9085, 0.1136, 0.1225, 0.3105],
       device='cuda:0')
label: tensor([0.7451, 0.6919, 0.4925, 0.8498, 0.1258, 0.0856, 0.8678])
guess: tensor([0.5035, 0.4902, 0.6354, 0.5709, 0.0849, 0.0563, 0.1436],
       device='cuda:0')
label: tensor([0.0021, 0.4327, 0.8246, 0.1568, 0.7232, 0.0538, 0.2460])
guess: tensor([0.9827, 0.3927, 0.1470, 0.9902, 0.0477, 0.0231, 0.0760],
       device='cuda:0')
label: tensor([0.7301, 0.4057, 0.3299, 0.9117, 0.2025, 0.0616, 0.0422])
guess: tensor([0.7216, 0.6330, 0.5254, 0.8690, 0.0827, 0.0497, 0.4367],
       device='cuda:0')
label: tensor([0.0118, 0.3882, 0.9073, 0.9266, 0.2879, 0.0380, 0.9061])
guess: tensor([0.8735, 0.6319, 0.7375, 0.5179, 0.1815, 0.2672, 0.5769],
       device='cuda:0')
label: tensor([0.0145, 0.6382, 0.7145, 0.3847, 0.1367, 0.0292, 0.5496])
guess: tensor([0.9833, 0.4327, 0.0486, 0.8703, 0.0850, 0.0976, 0.1762],
       device='cuda:0')
label: tensor([0.9956, 0.4119, 0.1035, 0.9656, 0.0770, 0.0439, 0

KeyboardInterrupt: 

In [None]:
[0.5089, 0.4934, 0.4833, 0.5658, 0.3435, 0.3011, 0.4650]]
[0.5280, 0.5032, 0.5000, 0.4981, 0.4837, 0.4912, 0.5217]]

In [290]:
modelMoreLayers = MultilabelClassifier()
modelMoreLayers.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [291]:
start = time.time()
epochs = 5
training2(modelMoreLayers, train_dl, 3, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 6.897604477405548
average loss: 6.305478936433792
average loss: 5.6685918986797335
average loss: 5.094630074501038
acousticness loss: 0.48
danceability loss: 0.42
energy loss: 0.43
instrumentalness loss: 0.42
liveness loss: 0.45
speechiness loss: 0.43
valence loss: 0.42
Average loss: 3.04
Epoch 1
average loss: 3.17913413643837
average loss: 3.1228254795074464
average loss: 3.067784917354584
average loss: 3.034180146455765
acousticness loss: 0.45
danceability loss: 0.38
energy loss: 0.41
instrumentalness loss: 0.39
liveness loss: 0.41
speechiness loss: 0.39
valence loss: 0.38
Average loss: 2.80
Epoch 2
average loss: 2.856695681810379
average loss: 2.8568764239549638


KeyboardInterrupt: 

In [278]:
model = MultilabelClassifier()
model.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [279]:
import time

In [280]:
start = time.time()
epochs = 5
training2(model, train_dl, 3, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 0.11293088812381029
average loss: 0.10601233821362258
average loss: 0.1004203526303172
average loss: 0.09295552968978882
acousticness loss: 0.57
danceability loss: 0.50
energy loss: 0.51
instrumentalness loss: 0.50
liveness loss: 0.51
speechiness loss: 0.51
valence loss: 0.48
Average loss: 3.57
Epoch 1
average loss: 0.052786702662706374
average loss: 0.05130268591456115
average loss: 0.050571791796634596
average loss: 0.049697867361828685
acousticness loss: 0.45
danceability loss: 0.40
energy loss: 0.41
instrumentalness loss: 0.40
liveness loss: 0.42
speechiness loss: 0.41
valence loss: 0.39
Average loss: 2.89
Epoch 2
average loss: 0.04661548780277371
average loss: 0.04642805797047913
average loss: 0.04622978841265043
average loss: 0.04630036554299295
acousticness loss: 0.45
danceability loss: 0.39
energy loss: 0.40
instrumentalness loss: 0.40
liveness loss: 0.41
speechiness loss: 0.40
valence loss: 0.37
Average loss: 2.82
Finished Training
Train time 4051.7000648

In [None]:
#training(model, device, 0.001, 2, train_dl)

In [164]:
model2 = MultilabelClassifier()
model2.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [165]:
start = time.time()
epochs = 5
training2(model2, train_dl, 3,batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 0.108964903652668
average loss: 0.1027186818420887
average loss: 0.09726419902096192
average loss: 0.09044033079408109
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.06
Epoch 1
average loss: 0.05588214611634612
average loss: 0.05390318823046982
average loss: 0.05264179725199938
average loss: 0.051857108669355514
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.05
Epoch 2
average loss: 0.04965486526489258
average loss: 0.049170148512348535
average loss: 0.04884137135619918
average loss: 0.048574045207351445
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.05
Finished Training
Train time 1123.020496368

In [247]:
torch.save(model2.state_dict(), "./mse3.pt")

In [168]:
start = time.time()
epochs = 5
training2(model2, train_dl, 5,batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 0.04863426648080349
average loss: 0.047295431001111866
average loss: 0.047831756776819626
average loss: 0.047983313398435715
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.05
Epoch 1
average loss: 0.04723782381042838
average loss: 0.04730176976881921
average loss: 0.04728270306562384
average loss: 0.04722315892577171
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.05
Epoch 2
average loss: 0.04691378735005856
average loss: 0.046353789325803516
average loss: 0.04597317818552256
average loss: 0.04586833214852959
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.04
Epoch 3
average loss: 0.045190654881298

In [242]:
start = time.time()
epochs = 5
training2(model2, train_dl, 2, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 0.04370927382260561
average loss: 0.04437635503709316
average loss: 0.04464876937369506
average loss: 0.044752048491500315
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.04
Epoch 1
average loss: 0.04484733622521162
average loss: 0.04456243747845292
average loss: 0.04428432133669655
average loss: 0.04419455979950726
acousticness loss: 0.01
danceability loss: 0.01
energy loss: 0.01
instrumentalness loss: 0.01
liveness loss: 0.01
speechiness loss: 0.01
valence loss: 0.01
Average loss: 0.04
Finished Training
Train time 1559.198213815689 seconds, 311.8396427631378 per epoch


In [298]:
with torch.no_grad():
    d = (val_ds[3][0]).unsqueeze(0)
    print(d.shape)
    output = modelMoreLayers(d.to(device))
    print(output)
        
    print(val_ds[2][0])


torch.Size([1, 2, 64, 1719])
tensor([[0.5244, 0.4885, 0.5382, 0.6526, 0.1808, 0.0815, 0.4356]],
       device='cuda:0')
tensor([[[  7.8497,   6.9969,   8.9079,  ...,   8.7589,  10.1137,  11.3289],
         [ 16.9362,  19.2113,  22.9567,  ...,  20.8911,  24.2855,  27.8140],
         [ 24.0604,  24.9446,  26.5424,  ...,  24.8856,  25.9694,  31.5921],
         ...,
         [-32.9033, -32.9033, -32.9033,  ..., -32.9033, -32.9033, -32.9033],
         [-32.9033, -32.9033, -32.9033,  ..., -32.9033, -32.9033, -32.9033],
         [-32.9033, -32.9033, -32.9033,  ..., -32.9033, -32.9033, -32.9033]],

        [[ 25.4756,  10.3245,   3.9496,  ...,  12.5629,  11.1997,  10.2107],
         [ 31.3277,  25.7910,  18.2814,  ...,  24.8400,  23.8563,  25.8615],
         [ 27.9848,  31.1613,  25.7062,  ...,  25.9389,  21.5055,  29.4315],
         ...,
         [-32.9033, -32.9033, -32.9033,  ..., -32.9033, -32.9033, -32.9033],
         [-32.9033, -32.9033, -32.9033,  ..., -32.9033, -32.9033, -32.9033],
   

In [149]:
model3 = MultilabelClassifier()
model3.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [150]:
start = time.time()
epochs = 5
training2(model3, train_dl, 5, batch_size)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0
average loss: 5.657956218719482
average loss: 5.646765744686126
average loss: 5.619418315092722
average loss: 5.60176653265953
acousticness loss: 0.85
danceability loss: 0.76
energy loss: 0.76
instrumentalness loss: 0.76
liveness loss: 0.76
speechiness loss: 0.75
valence loss: 0.76
Average loss: 5.39
Epoch 1
average loss: 5.464208805561066
average loss: 5.41899082660675
average loss: 5.390682844320933
average loss: 5.361174091696739
acousticness loss: 0.81
danceability loss: 0.73
energy loss: 0.73
instrumentalness loss: 0.73
liveness loss: 0.73
speechiness loss: 0.72
valence loss: 0.73
Average loss: 5.19
Epoch 2
average loss: 5.266837430000305
average loss: 5.265875309705734
average loss: 5.2740598320961
average loss: 5.274406424164772
acousticness loss: 0.81
danceability loss: 0.73
energy loss: 0.72
instrumentalness loss: 0.73
liveness loss: 0.73
speechiness loss: 0.72
valence loss: 0.73
Average loss: 5.17
Epoch 3
average loss: 5.227116477489472
average loss: 5.242999559640884

In [163]:
torch.save(model3.state_dict(), "./crossEnt")

In [161]:
with torch.no_grad():
    for i in val_dl:
        data = i[]
    d = (val_ds[0][0]).unsqueeze(0)
    print(d.shape)
    output = model3(d.to(device))
    print(output)
    print(val_ds[0][1])

            

torch.Size([1, 2, 64, 344])
tensor([[0.8132, 0.7911, 0.8265, 0.9452, 0.0252, 0.0106, 0.6579]],
       device='cuda:0')
{'acousticness': 0.552558797, 'danceability': 0.692302488, 'energy': 0.770812011, 'instrumentalness': 0.734063882, 'liveness': 0.110137088, 'speechiness': 0.076767169, 'tempo': 91.759, 'valence': 0.429265827}


In [191]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [192]:
model_ft = models.resnet50(pretrained=True)
# set_parameter_requires_grad(model_ft, False)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 8)



In [193]:
start = time.time()
epochs = 5
training2(model_ft, train_dl, 3)
finish = time.time()
print(f"Train time {finish-start} seconds, {(finish-start)/epochs} per epoch")

Epoch 0


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[8, 2, 64, 344] to have 3 channels, but got 2 channels instead

In [96]:
from operator import add
res_list = list(map(add, [3,1], [4,6]))
res_list

[7, 7]

In [509]:
allSongs = pd.read_csv("./allSongs.csv", low_memory=False)

In [510]:
allSongs["fileName"][52221]

'079999.wav'

In [511]:
allSongs.drop(allSongs.index[52222:allSongs.shape[0]], inplace=True)


In [512]:
allSongs

Unnamed: 0,folder,fileName,song_name,artist
0,/000,000002.wav,Food,AWOL
1,/000,000003.wav,Electric Ave,AWOL
2,/000,000005.wav,This World,AWOL
3,/000,000010.wav,Freeway,Kurt Vile
4,/000,000020.wav,Spiritual Level,Nicky Cook
...,...,...,...,...
52217,/079,079994.wav,Old Man's Beard,Hudson
52218,/079,079995.wav,Two Sun Mountain,Hudson
52219,/079,079996.wav,Cloches des vaches,MZ-N710
52220,/079,079998.wav,The Cobbler,Hudson


In [513]:
toBeLabeledData = SoundDS_NewData(allSongs, ".")

In [514]:
toBeLabeledData[0][1]

'Food by AWOL'

In [515]:
toBeLabeledData.getName(1)

('Electric Ave', 'AWOL')

In [421]:
MSEModel = MultilabelClassifier()
MSEModel.load_state_dict(torch.load("./mse3.pt"))
MSEModel.to(device)

MultilabelClassifier(
  (conv1): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  (relu1): ReLU()
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu2): ReLU()
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu3): ReLU()
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (relu4): ReLU()
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Sequential(
    (0): Conv2d(2, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stri

In [422]:
batch_size = 32# choose an appropriate batch size
new_loader = DataLoader(toBeLabeledData, batch_size=batch_size)

# Evaluate the model on the new set of spectrograms
MSEModel.eval()
with torch.no_grad():
    for batch in val_dl:
        inputs = batch[0].to(device)
        outputs = MSEModel(inputs)
        for i in range(batch_size):
            print("guess:",outputs[i])
            print("label:",extractLabels(batch[1],i))
#         print(outputs)
#         print(batch[1])

guess: tensor([0.6899, 0.3764, 0.6601, 0.9584, 0.1365, 0.1130, 0.3446],
       device='cuda:0')
label: tensor([0.7451, 0.6919, 0.4925, 0.8498, 0.1258, 0.0856, 0.8678])
guess: tensor([0.4816, 0.2864, 0.8356, 0.8800, 0.2008, 0.1435, 0.0919],
       device='cuda:0')
label: tensor([0.0021, 0.4327, 0.8246, 0.1568, 0.7232, 0.0538, 0.2460])
guess: tensor([0.9915, 0.1593, 0.1571, 0.9995, 0.1017, 0.0154, 0.1612],
       device='cuda:0')
label: tensor([0.7301, 0.4057, 0.3299, 0.9117, 0.2025, 0.0616, 0.0422])
guess: tensor([0.9522, 0.2175, 0.7185, 0.9933, 0.1340, 0.0638, 0.3708],
       device='cuda:0')
label: tensor([0.0118, 0.3882, 0.9073, 0.9266, 0.2879, 0.0380, 0.9061])
guess: tensor([0.9492, 0.3140, 0.7084, 0.9678, 0.1161, 0.1088, 0.3806],
       device='cuda:0')
label: tensor([0.0145, 0.6382, 0.7145, 0.3847, 0.1367, 0.0292, 0.5496])
guess: tensor([0.9804, 0.2335, 0.0836, 0.9977, 0.0461, 0.0131, 0.2133],
       device='cuda:0')
label: tensor([0.9956, 0.4119, 0.1035, 0.9656, 0.0770, 0.0439, 0

KeyboardInterrupt: 

In [477]:
torch.save(modelSGDLR.state_dict(), "./SGD.pt")

In [516]:
batch_size = 32# choose an appropriate batch size
new_loader = DataLoader(toBeLabeledData, batch_size=batch_size)
allResults = []
# Evaluate the model on the new set of spectrograms
modelSGDLR.eval()
asd = 0
with torch.no_grad():

    for batch in new_loader:

        inputs = batch[0].to(device)
        outputs = modelSGDLR(inputs)
        for i in range(batch_size):
            songInfo = [batch[1][i]]
            for label in outputs[i].tolist():
                songInfo.append(label)
            allResults.append(songInfo)

            asd+=1
        print(len(allResults))

#             print("guess:",outputs[i])
#             print("label:",extractLabels(batch[1],i))
#         print(outputs)
#         print(batch[1])
    

32
64
96
128
160
192
224
256
288
320
352
384
416
448
480
512
544
576
608
640
672
704
736
768
800
832
864
896
928
960
992
1024
1056
1088
1120
1152
1184
1216
1248
1280
1312
1344
1376
1408
1440
1472
1504
1536
1568
1600
1632
1664
1696
1728
1760
1792
1824
1856
1888
1920
1952
1984
2016
2048
2080
2112
2144
2176
2208
2240
2272
2304
2336
2368
2400
2432
2464
2496
2528
2560
2592
2624
2656
2688
2720
2752
2784
2816
2848
2880
2912
2944
2976
3008
3040
3072
3104
3136
3168
3200
3232
3264
3296
3328
3360
3392
3424
3456
3488
3520
3552
3584
3616
3648
3680
3712
3744
3776
3808
3840
3872
3904
3936
3968
4000
4032
4064
4096
4128
4160
4192
4224
4256
4288
4320
4352
4384
4416
4448
4480
4512
4544
4576
4608
4640
4672
4704
4736
4768
4800
4832
4864
4896
4928
4960
4992
5024
5056
5088
5120
5152
5184
5216
5248
5280
5312
5344
5376
5408
5440
5472
5504
5536
5568
5600
5632
5664
5696
5728
5760
5792
5824
5856
5888
5920
5952
5984
6016
6048
6080
6112
6144
6176
6208
6240
6272
6304
6336
6368
6400
6432
6464
6496
6528
6560
6592
6624

45600
45632
45664
45696
45728
45760
45792
45824
45856
45888
45920
45952
45984
46016
46048
46080
46112
46144
46176
46208
46240
46272
46304
46336
46368
46400
46432
46464
46496
46528
46560
46592
46624
46656
46688
46720
46752
46784
46816
46848
46880
46912
46944
46976
47008
47040
47072
47104
47136
47168
47200
47232
47264
47296
47328
47360
47392
47424
47456
47488
47520
47552
47584
47616
47648
47680
47712
47744
47776
47808
47840
47872
47904
47936
47968
48000
48032
48064
48096
48128
48160
48192
48224
48256
48288
48320
48352
48384
48416
48448
48480
48512
48544
48576
48608
48640
48672
48704
48736
48768
48800
48832
48864
48896
48928
48960
48992
49024
49056
49088
49120
49152
49184
49216
49248
49280
49312
49344
49376
49408
49440
49472
49504
49536
49568
49600
49632
49664
49696
49728
49760
49792
49824
49856
49888
49920
49952
49984
50016
50048
50080
50112
50144
50176
50208
50240
50272
50304
50336
50368
50400
50432
50464
50496
50528
50560
50592
50624
50656
50688
50720
50752
50784
50816
50848
50880
5091

IndexError: tuple index out of range

In [517]:
new_df = pd.DataFrame(columns=["song_artist_name","acousticness", 'danceability',
                               "energy", "instrumentalness", "liveness", "speechiness",
                               "valence"], data=allResults)
new_df.to_csv("./allSongLabels.csv", index=False)

In [475]:
len(results)

32

In [519]:
songDataset = pd.read_csv("./allSongsLabelwClusters.csv", low_memory=False)

In [520]:
len(songDataset)

52222

In [705]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch
from torchvision import transforms, datasets

In [785]:
from torch.utils.data import DataLoader, Dataset, random_split
import torchaudio
from torchvision import transforms, utils
# ----------------------------
# Sound Dataset
# ----------------------------
class SongLabelSet(Dataset):
  def __init__(self, df):
    self.df = df

  # ----------------------------
  # Number of items in dataset
  # ----------------------------
  def __len__(self):
    return len(self.df)    
    
  # ----------------------------
  # Get i'th item in dataset
  # ----------------------------
  def __getitem__(self, idx):
    values = [
        self.df["acousticness"][idx],
        self.df["danceability"][idx],
        self.df["energy"][idx],
        self.df["instrumentalness"][idx],
        self.df["liveness"][idx],
        self.df["speechiness"][idx],
        self.df["valence"][idx],
    ]
    values = torch.tensor(values, dtype=torch.float32)
    label = self.df["cluster_id"][idx]
    label = torch.tensor(self.df["cluster_id"][idx])
    return values, label


In [786]:
songData = SongLabelSet(songDataset)

In [787]:
songData[1]

(tensor([0.7395, 0.6931, 0.5902, 0.3392, 0.1644, 0.1984, 0.6502]),
 tensor(18., dtype=torch.float64))

In [788]:
num_items = len(songDataset)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(songData, [num_train, num_val])
num_items
len(val_ds)

10444

In [789]:
batch_size = 64

In [790]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [791]:
import torch
import torch.nn as nn

class LabelToClusterModel(nn.Module):
    def __init__(self):
        super(LabelToClusterModel, self).__init__()
        self.fc1 = nn.Linear(7, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 256)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, 512)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(512, 500)
        self.softmax = nn.Softmax(dim=1)
        
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)
        nn.init.xavier_uniform_(self.fc3.weight)
        nn.init.zeros_(self.fc3.bias)
        nn.init.xavier_uniform_(self.fc4.weight)
        nn.init.zeros_(self.fc4.bias)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        x = self.softmax(x)
        return x

In [792]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [799]:
model = LabelToClusterModel()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss()
print(next(model.parameters()).device)

cuda:0


In [806]:
def trainLabel(model, optimizer, criterion, trainloader, testloader, num_epochs):
    model.train()
    lossList = []
    accuracyList = []
#     print(device)
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0
        val_loss = 0.0
        val_acc = 0.0
        for data in trainloader:
            
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            labels = labels.type(torch.LongTensor)

            optimizer.zero_grad()

            outputs = model(inputs)
            predicted_class = torch.argmax(outputs, dim=1)
            print(predicted_class)
#             break
            loss = criterion(predicted_class.to(device), labels.to(device))
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_acc += (predicted == labels.to(device)).sum().item()
            

 # Validation loop
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                labels = labels.type(torch.LongTensor)

                outputs = model(inputs.to(device))
                loss = criterion(outputs.to(device), labels.to(device))
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_acc += (predicted == labels.to(device)).sum().item()
        
        # Print epoch statistics
        train_loss /= len(trainloader.dataset)
        train_acc /= len(trainloader.dataset)
        val_loss /= len(testloader.dataset)
        val_acc /= len(testloader.dataset)
        print(f'Epoch {epoch+1}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}')

    print("Finished training")
    return lossList, accuracyList

In [807]:
lossList, accuracyList = trainLabel(model, optimizer, criterion, train_dl, val_dl, 50)

tensor([ 61,  21, 381, 330, 377, 330, 377, 474, 474, 381,  21, 377, 445, 377,
        474,  21, 330, 377, 377, 474, 377, 381, 474, 330, 377, 377, 474, 377,
        377, 377, 381, 381, 381, 474, 377, 377, 474, 377, 474, 377, 377, 330,
        377, 474, 381, 474, 377, 381,  21,  61, 474,  21, 377, 474, 393, 474,
        377, 330, 474, 377, 377, 381, 377, 381], device='cuda:0')


RuntimeError: Expected floating point type for target with class probabilities, got Long

In [808]:
# Define transform to normalize data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Download and load training data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

# Download and load test data
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

NameError: name 'torchvision' is not defined