In [1]:
from model import DFL_VGG16,DFL_RESNET,DFL_EfficientNet,MangoNet,MangoRegressionNet
from dataset import MangoDataset
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix
from util import mixup_data, mixup_criterion, plot_confusion_matrix
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
import torch
import pandas as pd
from tqdm import tqdm
import torch.nn.functional as F
%matplotlib inline

## load data

In [3]:
BATCH_SIZE=8
trainset = MangoDataset(root = './data/', train = "train")
valset = MangoDataset(root = './data/', train = "val")
testset = MangoDataset(root = './data/', train = "test")
# create train/val loaders
train_loader = DataLoader(dataset=trainset,
                          batch_size=BATCH_SIZE, 
                          shuffle=True,
                          num_workers=1)

val_loader = DataLoader(dataset=valset,
                        batch_size=BATCH_SIZE, 
                        shuffle=False,
                        num_workers=1)

test_loader = DataLoader(dataset=testset,
                        batch_size=BATCH_SIZE, 
                        shuffle=False,
                        num_workers=1)

train_n = len(trainset)
val_n = len(valset)


  label = np.where(label=="A", 0, label)
  label = np.where(label=="B", 1, label)
  numerical_lbls = np.where(label=="C", 2, label)


In [4]:
EPOCH = 50
DEVICE = "cuda:1"
model_path = "model/"

## DFL 

In [None]:

# model = DFL_EfficientNet().to(DEVICE)
# model = DFL_RESNET().to(DEVICE)
model = DFL_VGG16().to(DEVICE)
model.load_state_dict(torch.load(model_path+"DFL_VGG16"))
ce = torch.nn.NLLLoss()
# opt = torch.optim.Adam([{'params': model.parameters(), 'lr': 0.0001}]) ## for efficientnet
opt = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay = 0.000005)
for i in range(EPOCH):
    model.train()
    for data, label in tqdm(train_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1, out2, out3 = model(data)
        out1, out2, out3 = F.log_softmax(out1,1), F.log_softmax(out2,1), F.log_softmax(out3,1)
        loss = ce(out1,label)+ce(out2,label)+0.1*ce(out3,label)
        loss.backward()
        opt.step()
        opt.zero_grad()
    print(loss)

    model.eval()
    correct = 0
    for data, label in tqdm(val_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1, out2, out3 = model(data)
        out = out1+out2+0.1*out3
        pred = torch.max(out,axis=1)[1]
        correct += torch.sum((label==pred).int()).detach().item()
    print("valacc:",correct/val_n)
    model.eval()
    correct = 0
    for data, label in tqdm(train_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1, out2, out3 = model(data)
        out = out1+out2+0.1*out3
        pred = torch.max(out,axis=1)[1]
        correct += torch.sum((label==pred).int()).detach().item()
    print("trainacc:",correct/train_n)
#     torch.save(model.state_dict(), "DFL_VGG16_1")

## EfficientNet + Mixup

In [None]:
model = MangoNet().to(DEVICE)

ce = torch.nn.NLLLoss()
opt = torch.optim.Adam([{'params': model.parameters(), 'lr': 0.0001}])
# opt = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay = 0.000005)
for i in range(EPOCH):
    model.train()
    for data, label in tqdm(train_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1 = model(data)
        out1 = F.log_softmax(out1,1)
        loss = ce(out1,label)
        mixed_x, y_a, y_b, lam = mixup_data(data, label)
        pred = model(mixed_x)
        mixloss = mixup_criterion(ce, pred, y_a, y_b, lam)
        closs = loss+mixloss
        closs.backward()
        opt.step()
        opt.zero_grad()
    print(closs)
    model.eval()
    correct = 0
    for data, label in tqdm(val_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1 = model(data)

        pred = torch.max(out1,axis=1)[1]
        correct += torch.sum((label==pred).int()).detach().item()
    print("valacc:",correct/val_n)
    model.eval()
    correct = 0
    for data, label in tqdm(train_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1 = model(data)
        pred = torch.max(out1,axis=1)[1]
        correct += torch.sum((label==pred).int()).detach().item()
    print("trainacc:",correct/train_n)
    torch.save(model.state_dict(), model_path+"model")
#     

## DFL Feature Visualization

In [None]:
model = DFL_VGG16().to(DEVICE)
model.load_state_dict(torch.load(model_path+"DFL_VGG16"))
model.eval()
correct = 0
preds,labels,val_label,val_pred=[],[],[],[]
test_feature_a,train_feature_a,val_feature_a=[],[],[]
for data in tqdm(test_loader):
    data = data.to(DEVICE)
    out1,out2,out3,feature_a,feature_b = model(data)
    test_feature_a.append(feature_b.data.cpu())
    out = out1+out2+0.1*out3
    pred = torch.max(out,axis=1)[1]
    preds.append(pred)
for data,label in tqdm(val_loader):
    data,label = data.to(DEVICE),label.to(DEVICE)
    out1,out2,out3,feature_a,feature_b = model(data)
    val_feature_a.append(feature_b.data.cpu())
    val_label.append(label.cpu())
for data,label in tqdm(train_loader):
    data,label = data.to(DEVICE),label.to(DEVICE)
    out1,out2,out3,feature_a,feature_b = model(data)
    train_feature_a.append(feature_b.data.cpu())
    labels.append(label.cpu())
test_feature_a = np.asarray(torch.cat(test_feature_a).cpu().numpy())
train_feature_a = np.asarray(torch.cat(train_feature_a).cpu().numpy())
val_feature_a = np.asarray(torch.cat(val_feature_a).cpu().numpy())
labels = np.asarray(torch.cat(labels).cpu().numpy())
val_label = np.asarray(torch.cat(val_label).cpu().numpy())
n=300
a = np.concatenate((train_feature_a[:n].reshape(n,-1),test_feature_a[:n].reshape(n,-1)),axis=0)
a = np.concatenate((a,val_feature_a[:n].reshape(n,-1)),axis=0)
X_embedded = TSNE(n_components=2).fit_transform(a)
plt.scatter(X_embedded[:n,0],X_embedded[:n,1],c=labels[:n],label="train",marker=".",s=50)
plt.scatter(X_embedded[n:2*n,0],X_embedded[n:2*n,1],c="black",label="test",marker=".",s=50)
plt.scatter(X_embedded[2*n:,0],X_embedded[2*n:,1],c=val_label[:n],label="val",marker=".",s=50)
plt.legend(loc='upper right')
plt.show()

## EfficientNet Feature Visualization

In [None]:

preds,labels,val_label,val_pred=[],[],[],[]
test_feature_a,train_feature_a,val_feature_a=[],[],[]
model = MangoNet().to(DEVICE)
model.load_state_dict(torch.load(model_path+"EfficientNet"))
for data in tqdm(test_loader):
    data = data.to(DEVICE)
    feature_b = model.backbone.extract_features(data)
    test_feature_a.append(feature_b.data.cpu())
for data,label in tqdm(val_loader):
    data,label = data.to(DEVICE),label.to(DEVICE)
    feature_b = model.backbone.extract_features(data)
    val_feature_a.append(feature_b.data.cpu())
    val_label.append(label.cpu())
for data,label in tqdm(train_loader):
    data,label = data.to(DEVICE),label.to(DEVICE)
    feature_b = model.backbone.extract_features(data)
    train_feature_a.append(feature_b.data.cpu())
    labels.append(label.cpu())
test_feature_a = np.asarray(torch.cat(test_feature_a).cpu().numpy())
train_feature_a = np.asarray(torch.cat(train_feature_a).cpu().numpy())
val_feature_a = np.asarray(torch.cat(val_feature_a).cpu().numpy())
labels = np.asarray(torch.cat(labels).cpu().numpy())
val_label = np.asarray(torch.cat(val_label).cpu().numpy())

train_feature_a = np.mean(train_feature_a,axis=(2,3))
test_feature_a = np.mean(test_feature_a,axis=(2,3))
val_feature_a = np.mean(val_feature_a,axis=(2,3))
n=300
a = np.concatenate((train_feature_a[:n].reshape(n,-1),test_feature_a[:n].reshape(n,-1)),axis=0)
a = np.concatenate((a,val_feature_a[:n].reshape(n,-1)),axis=0)
X_embedded = TSNE(n_components=2).fit_transform(a)
plt.scatter(X_embedded[:n,0],X_embedded[:n,1],c=labels[:n],label="train",marker=".",s=50)
plt.scatter(X_embedded[n:2*n,0],X_embedded[n:2*n,1],c="black",label="test",marker=".",s=50)
plt.scatter(X_embedded[2*n:,0],X_embedded[2*n:,1],c=val_label[:n],label="val",marker=".",s=50)
plt.legend(loc='upper right')
# plt.savefig(method+"scatter"+str(step)+".png")
plt.show()

## EfficientNet Testing

In [None]:
DEVICE = "cuda:1"
model = MangoNet().to(DEVICE)
model.load_state_dict(torch.load(model_path+"EfficientNet"))
model.eval()
correct = 0
preds=[]
labels=[]
for data,label in tqdm(val_loader):
    data,label = data.to(DEVICE),label.to(DEVICE)
    out = model(data)
#     out = out1+out2+0.1*out3
    pred = torch.max(out,axis=1)[1]
    preds.append(pred)
    labels.append(label)
preds = np.asarray(torch.cat(preds).cpu().numpy())
labels = np.asarray(torch.cat(labels).cpu().numpy())

## Confusion Matrix

In [None]:
model = DFL_VGG16().to(DEVICE)
model.load_state_dict(torch.load(model_path+"DFL_VGG16"))
model.eval()
correct = 0
preds=[]
labels=[]
for data,label in tqdm(val_loader):
    data,label = data.to(DEVICE),label.to(DEVICE)
    out1,out2,out3,feature_a,feature_b = model(data)
    out = out1+out2+0.1*out3
    pred = torch.max(out,axis=1)[1]
    preds.append(pred)
    labels.append(label)
preds = np.asarray(torch.cat(preds).cpu().numpy())
labels = np.asarray(torch.cat(labels).cpu().numpy())
cnf_matrix = confusion_matrix(labels, preds)
plot_confusion_matrix(cnf_matrix,classes=[a for a in range(3)],normalize=True)
plt.show()

DEVICE = "cuda:0"
model = MangoNet().to(DEVICE)
model.load_state_dict(torch.load(model_path+"EfficientNet"))
model.eval()
correct = 0
preds,labels=[],[]
for data,label in tqdm(val_loader):
    data = data.to(DEVICE)
    out = model(data)
#     out = out1+out2+0.1*out3
    pred = torch.max(out,axis=1)[1]
    labels.append(label)
    preds.append(pred)
preds = np.asarray(torch.cat(preds).cpu().numpy())
labels = np.asarray(torch.cat(labels).cpu().numpy())
cnf_matrix = confusion_matrix(labels, preds)
plot_confusion_matrix(cnf_matrix,classes=[a for a in range(3)],normalize=True)
plt.show()

## Submit Result

In [None]:

DEVICE = "cuda:1"
model = MangoNet().to(DEVICE)
model.load_state_dict(torch.load(model_path+"EfficientNet"))
model.eval()
preds=[]
for data in tqdm(test_loader):
    data = data.to(DEVICE)
    out = model(data)
#     out = out1+out2+0.1*out3
    pred = torch.max(out,axis=1)[1]
    preds.append(pred)
preds = np.asarray(torch.cat(preds).cpu().numpy())
preds = np.where(preds==0, "A", preds)
preds = np.where(preds=="1", "B", preds)
preds = np.where(preds=="2", "C", preds)
df = pd.read_csv("test_example.csv")
df["label"]=pd.Series(preds)
df = df.set_index("image_id")
df.to_csv("result_DFL.csv")

## Efficient Regression

In [9]:

EPOCH = 50
DEVICE = "cuda:1"
model = MangoRegressionNet().to(DEVICE)

ce = torch.nn.NLLLoss()
mse = torch.nn.MSELoss()
opt = torch.optim.Adam([{'params': model.parameters(), 'lr': 0.0001}])
# opt = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay = 0.000005)
for i in range(EPOCH):
#     model.train()
#     for data, label in tqdm(train_loader):
#         data, label = data.to(DEVICE), label.to(DEVICE).float()/2
#         out1 = model(data)
#         out1 = F.sigmoid(out1)
#         loss = mse(out1,label)
# #         mixed_x, y_a, y_b, lam = mixup_data(data, label)
# #         pred = model(mixed_x)
# #         mixloss = mixup_criterion(ce, pred, y_a, y_b, lam)
#         closs = loss#+mixloss
#         closs.backward()
#         opt.step()
#         opt.zero_grad()
#     print(closs)
    model.eval()
    correct = 0
    for data, label in tqdm(val_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1 = model(data)
        out1 = F.sigmoid(out1)
        print(out1)
        out1[out1< 0.33]=0
        out1[out1>0.66]=2
        out1[(0.33< out1) & (out1 < 0.66)]=1
        
        print(label)
        print(out1)
        correct += torch.sum((label==out1.reshape(-1)).int()).detach().item()
    print("valacc:",correct/val_n)
    model.eval()
    correct = 0
    for data, label in tqdm(train_loader):
        data, label = data.to(DEVICE), label.to(DEVICE)
        out1 = model(data)
        out1 = F.sigmoid(out1)
        out1 = torch.where(out1 < 0.33, torch.tensor(0.), out1)
        out1 = torch.where((0.33< out1) & (out1 < 0.66), torch.tensor(1.), out1)
        pred = torch.where(out1 > 0.66, torch.tensor(2.), out1)
        correct += torch.sum((label==pred).int()).detach().item()
    print("trainacc:",correct/train_n)
    torch.save(model.state_dict(), model_path+"regressionmodel")

  0%|          | 0/100 [00:00<?, ?it/s]

Loaded pretrained weights for efficientnet-b7


  2%|▏         | 2/100 [00:00<00:22,  4.39it/s]

tensor([[0.5178],
        [0.5179],
        [0.5422],
        [0.5392],
        [0.5222],
        [0.4643],
        [0.5047],
        [0.5571]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 1, 1, 2, 1, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5461],
        [0.5279],
        [0.5919],
        [0.5314],
        [0.5306],
        [0.5181],
        [0.5011],
        [0.5429]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 0, 1, 2, 1, 2, 1], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


  4%|▍         | 4/100 [00:00<00:16,  5.91it/s]

tensor([[0.5297],
        [0.5613],
        [0.5587],
        [0.5584],
        [0.5991],
        [0.5169],
        [0.5435],
        [0.5063]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 0, 1, 0, 0, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5340],
        [0.5240],
        [0.5159],
        [0.5253],
        [0.5550],
        [0.5218],
        [0.5331],
        [0.5252]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 2, 2, 1, 1, 1, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


  6%|▌         | 6/100 [00:00<00:12,  7.34it/s]

tensor([[0.5568],
        [0.5288],
        [0.5215],
        [0.5287],
        [0.5064],
        [0.5255],
        [0.5471],
        [0.5317]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([2, 2, 0, 0, 0, 0, 0, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5477],
        [0.5842],
        [0.5397],
        [0.5335],
        [0.5146],
        [0.5328],
        [0.5289],
        [0.5519]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 2, 0, 2, 2, 2, 1, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


  8%|▊         | 8/100 [00:01<00:11,  7.72it/s]

tensor([[0.5651],
        [0.5392],
        [0.4926],
        [0.5333],
        [0.5268],
        [0.5103],
        [0.5037],
        [0.5252]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([2, 0, 1, 1, 1, 2, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5043],
        [0.5566],
        [0.5564],
        [0.5379],
        [0.5706],
        [0.5313],
        [0.5425],
        [0.5369]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 2, 0, 1, 1, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 10%|█         | 10/100 [00:01<00:11,  7.85it/s]

tensor([[0.5075],
        [0.5312],
        [0.5352],
        [0.5189],
        [0.5544],
        [0.5268],
        [0.5269],
        [0.4963]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 2, 2, 0, 2, 1, 2, 1], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5465],
        [0.4923],
        [0.5409],
        [0.5157],
        [0.5084],
        [0.4799],
        [0.5633],
        [0.5566]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 2, 2, 2, 1, 0, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 12%|█▏        | 12/100 [00:01<00:10,  8.02it/s]

tensor([[0.5215],
        [0.5143],
        [0.5207],
        [0.5263],
        [0.5097],
        [0.5327],
        [0.5469],
        [0.5141]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([0, 2, 1, 1, 2, 2, 2, 2], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5449],
        [0.5403],
        [0.5605],
        [0.5290],
        [0.5311],
        [0.5223],
        [0.5319],
        [0.5191]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([0, 1, 2, 2, 1, 2, 2, 2], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 14%|█▍        | 14/100 [00:01<00:10,  8.45it/s]

tensor([[0.5479],
        [0.5242],
        [0.5247],
        [0.5091],
        [0.5237],
        [0.5170],
        [0.5186],
        [0.5558]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 2, 2, 2, 1, 1, 2], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5308],
        [0.5447],
        [0.5515],
        [0.5111],
        [0.5058],
        [0.5582],
        [0.5184],
        [0.5207]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 1, 1, 1, 1, 2, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 16%|█▌        | 16/100 [00:02<00:10,  8.09it/s]

tensor([[0.5885],
        [0.5334],
        [0.5366],
        [0.5195],
        [0.5156],
        [0.5232],
        [0.5097],
        [0.5050]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 0, 2, 2, 0, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5146],
        [0.4905],
        [0.5231],
        [0.5446],
        [0.5242],
        [0.5362],
        [0.5561],
        [0.5386]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 0, 2, 1, 2, 1, 0, 1], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 18%|█▊        | 18/100 [00:02<00:09,  8.28it/s]

tensor([[0.5824],
        [0.5238],
        [0.5482],
        [0.5315],
        [0.5049],
        [0.4984],
        [0.5180],
        [0.5163]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([2, 1, 0, 0, 0, 1, 0, 2], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5347],
        [0.5730],
        [0.5080],
        [0.5143],
        [0.5573],
        [0.5621],
        [0.5231],
        [0.5577]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 1, 0, 2, 0, 1, 2, 0], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 20%|██        | 20/100 [00:02<00:09,  8.17it/s]

tensor([[0.4941],
        [0.5589],
        [0.5081],
        [0.5678],
        [0.5064],
        [0.4925],
        [0.4821],
        [0.4952]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([0, 0, 2, 1, 1, 1, 1, 1], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)
tensor([[0.5235],
        [0.5015],
        [0.5269],
        [0.5316],
        [0.5149],
        [0.5334],
        [0.5532],
        [0.5131]], device='cuda:1', grad_fn=<SigmoidBackward>)
tensor([1, 1, 1, 1, 1, 0, 2, 1], device='cuda:1')
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:1', grad_fn=<IndexPutBackward>)


 20%|██        | 20/100 [00:02<00:10,  7.48it/s]


KeyboardInterrupt: 