In [1]:
from subprocess import check_output
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from tqdm import tqdm

import torch.backends.cudnn as cudnn
import random

## Claim: the data preprocessing part is from https://www.kaggle.com/code/pavansanagapati/a-simple-cnn-model-beginner-guide/notebook

In [2]:
# This code is from https://www.kaggle.com/code/pankajj/fashion-mnist-with-pytorch-93-accuracy

class Data(Dataset):
    
    def __init__(self, data, transform = None):
        self.fashion_MNIST = list(data.values)
        self.transform = transform
        
        label = []
        image = []
        
        for i in self.fashion_MNIST:
             # first column is of labels.
            label.append(i[0])
            image.append(i[1:])
        self.labels = np.asarray(label)
        # Dimension of Images = 28 * 28 * 1. where height = width = 28 and color_channels = 1.
        self.images = np.asarray(image).reshape(-1, 28, 28, 1).astype('float32')

    def __getitem__(self, index):
        label = self.labels[index]
        image = self.images[index]
        
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.images)

In [3]:
train_csv = pd.read_csv("fashion-mnist_train.csv")
test_csv = pd.read_csv("fashion-mnist_test.csv")

train_data = Data(train_csv, transform=transforms.Compose([transforms.ToTensor()]))
test_data = Data(test_csv, transform=transforms.Compose([transforms.ToTensor()]))

batch_size = 256
train_dataloader = DataLoader(train_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [4]:
# From now on, we start to build our model
# We are also really big sad that we can't use maxpool inside sequential

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # image shape is 28 * 28 * 1, where 1 is one color channel
        # 28 * 28 is the image size
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        # output shape = (28 - 3 + 1) * (28 - 3 + 1) * 3 = 26 * 26 * 32
        # maxpooling
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        # output shape = 13 * 13 * 32
        # Note the volumn depth is not changed

        self.dropout = nn.Dropout(p=0.25)
        # output shape = 13 * 13 * 32


        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        # output shape = (13 - 3 + 1) * (13 - 3 + 1) * 64 = 11 * 11 * 64

        # maxpooling
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        # output shape = 5 * 5 * 64

        self.dropout = nn.Dropout(p=0.25)


        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        # output shape = (5 - 3 + 1) * (5 - 3 + 1) * 128 = 3 * 3 * 128

        self.dropout = nn.Dropout(p=0.4)
        # output shape = 3 * 3 * 128
        
        # fully connected layer
        self.fc1 = nn.Linear(3*3*128, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        # first conv
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.dropout(x)
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = F.relu(self.conv3(x))
        x = self.dropout(x)
        # flatten all dimensions except batch
        x = torch.flatten(x, 1)

        # fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.softmax(self.fc2(x))
        return x

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ConvNet().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

In [6]:
model.train()
epoches = 40
lost = []
for epoch in tqdm(range(epoches)):
    train_loss, correct = 0, 0
    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)
        
        pred = model(X)

        loss = loss_fn(pred, y.long())

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # record loss
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    size = len(train_dataloader.dataset)
    train_loss /= len(train_dataloader)
    correct /= size
    print(f" Train accuracy: {(100*correct):>0.1f}%, Avg loss: {train_loss:>8f}")
    lost.append(train_loss)


  2%|▎         | 1/40 [00:21<14:16, 21.97s/it]

 Train accuracy: 33.6%, Avg loss: 2.122391


  5%|▌         | 2/40 [00:43<13:39, 21.56s/it]

 Train accuracy: 47.8%, Avg loss: 1.982331


  8%|▊         | 3/40 [01:04<13:10, 21.38s/it]

 Train accuracy: 50.3%, Avg loss: 1.957960


 10%|█         | 4/40 [01:25<12:46, 21.30s/it]

 Train accuracy: 51.2%, Avg loss: 1.948322


 12%|█▎        | 5/40 [01:46<12:25, 21.31s/it]

 Train accuracy: 52.1%, Avg loss: 1.938890


 15%|█▌        | 6/40 [02:08<12:02, 21.26s/it]

 Train accuracy: 52.7%, Avg loss: 1.933203


 18%|█▊        | 7/40 [02:29<11:40, 21.23s/it]

 Train accuracy: 53.6%, Avg loss: 1.924011


 20%|██        | 8/40 [02:50<11:19, 21.23s/it]

 Train accuracy: 57.0%, Avg loss: 1.889691


 22%|██▎       | 9/40 [03:12<11:03, 21.41s/it]

 Train accuracy: 63.1%, Avg loss: 1.829083


 25%|██▌       | 10/40 [03:33<10:41, 21.39s/it]

 Train accuracy: 66.3%, Avg loss: 1.796857


 28%|██▊       | 11/40 [03:54<10:18, 21.32s/it]

 Train accuracy: 67.3%, Avg loss: 1.787794


 30%|███       | 12/40 [04:15<09:54, 21.25s/it]

 Train accuracy: 68.0%, Avg loss: 1.779859


 32%|███▎      | 13/40 [04:36<09:32, 21.20s/it]

 Train accuracy: 68.6%, Avg loss: 1.774698


 35%|███▌      | 14/40 [04:58<09:10, 21.17s/it]

 Train accuracy: 69.0%, Avg loss: 1.770450


 38%|███▊      | 15/40 [05:19<08:48, 21.13s/it]

 Train accuracy: 69.3%, Avg loss: 1.767382


 40%|████      | 16/40 [05:40<08:26, 21.12s/it]

 Train accuracy: 69.6%, Avg loss: 1.764388


 42%|████▎     | 17/40 [06:01<08:05, 21.11s/it]

 Train accuracy: 69.9%, Avg loss: 1.761256


 45%|████▌     | 18/40 [06:22<07:44, 21.10s/it]

 Train accuracy: 70.1%, Avg loss: 1.759150


 48%|████▊     | 19/40 [06:43<07:23, 21.10s/it]

 Train accuracy: 75.4%, Avg loss: 1.706653


 50%|█████     | 20/40 [07:04<07:01, 21.10s/it]

 Train accuracy: 79.1%, Avg loss: 1.669451


 52%|█████▎    | 21/40 [07:25<06:40, 21.09s/it]

 Train accuracy: 79.8%, Avg loss: 1.662969


 55%|█████▌    | 22/40 [07:46<06:20, 21.11s/it]

 Train accuracy: 80.0%, Avg loss: 1.660579


 57%|█████▊    | 23/40 [12:22<27:39, 97.60s/it]

 Train accuracy: 80.1%, Avg loss: 1.659277


 60%|██████    | 24/40 [12:43<19:53, 74.57s/it]

 Train accuracy: 81.0%, Avg loss: 1.651135


 62%|██████▎   | 25/40 [13:04<14:37, 58.49s/it]

 Train accuracy: 81.9%, Avg loss: 1.641968


 65%|██████▌   | 26/40 [28:31<1:14:28, 319.15s/it]

 Train accuracy: 82.3%, Avg loss: 1.637709


 68%|██████▊   | 27/40 [30:36<56:29, 260.76s/it]  

 Train accuracy: 82.6%, Avg loss: 1.635471


 70%|███████   | 28/40 [30:57<37:46, 188.86s/it]

 Train accuracy: 83.0%, Avg loss: 1.631147


 72%|███████▎  | 29/40 [31:49<27:05, 147.81s/it]

 Train accuracy: 83.1%, Avg loss: 1.629643


 75%|███████▌  | 30/40 [32:10<18:18, 109.82s/it]

 Train accuracy: 83.4%, Avg loss: 1.627199


 78%|███████▊  | 31/40 [32:32<12:31, 83.51s/it] 

 Train accuracy: 83.7%, Avg loss: 1.624756


 80%|████████  | 32/40 [32:55<08:41, 65.20s/it]

 Train accuracy: 83.9%, Avg loss: 1.622292


 82%|████████▎ | 33/40 [33:16<06:04, 52.08s/it]

 Train accuracy: 84.2%, Avg loss: 1.619566


 85%|████████▌ | 34/40 [33:38<04:17, 42.88s/it]

 Train accuracy: 84.2%, Avg loss: 1.618852


 88%|████████▊ | 35/40 [33:59<03:01, 36.36s/it]

 Train accuracy: 84.3%, Avg loss: 1.617388


 90%|█████████ | 36/40 [34:20<02:06, 31.74s/it]

 Train accuracy: 84.4%, Avg loss: 1.616688


 92%|█████████▎| 37/40 [34:41<01:25, 28.51s/it]

 Train accuracy: 84.7%, Avg loss: 1.614149


 95%|█████████▌| 38/40 [35:02<00:52, 26.29s/it]

 Train accuracy: 84.6%, Avg loss: 1.614141


 98%|█████████▊| 39/40 [35:24<00:24, 24.94s/it]

 Train accuracy: 84.8%, Avg loss: 1.613106


100%|██████████| 40/40 [35:45<00:00, 53.65s/it]

 Train accuracy: 85.0%, Avg loss: 1.611226





In [7]:
# testing set

model.eval()
correct = 0
# Turn off gradient descent
with torch.no_grad():
    for X, y in tqdm(test_dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        print("y_true: ", y, "y_pred: ", pred)
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
size = len(test_dataloader.dataset)
correct = correct / size
print(f" Test accuracy: {(100*correct):>0.1f}%")

 10%|█         | 4/40 [00:00<00:00, 38.47it/s]

y_true:  tensor([0, 1, 2, 2, 3, 2, 8, 6, 5, 0, 3, 4, 4, 6, 8, 5, 6, 3, 6, 4, 4, 4, 2, 1,
        5, 7, 8, 4, 4, 1, 5, 7, 7, 8, 1, 0, 9, 8, 0, 8, 2, 0, 4, 6, 2, 0, 3, 3,
        2, 3, 2, 2, 9, 3, 0, 9, 9, 4, 6, 0, 4, 5, 4, 6, 1, 1, 0, 9, 5, 2, 7, 3,
        4, 6, 5, 7, 1, 6, 1, 4, 9, 8, 1, 2, 4, 8, 9, 4, 1, 6, 3, 4, 2, 2, 2, 6,
        4, 7, 7, 3, 9, 3, 9, 0, 8, 2, 3, 8, 2, 7, 5, 5, 3, 2, 7, 5, 0, 2, 7, 1,
        0, 5, 4, 4, 7, 0, 8, 5, 0, 3, 1, 7, 9, 4, 9, 6, 4, 4, 2, 4, 3, 3, 3, 2,
        2, 6, 0, 0, 1, 3, 4, 3, 3, 1, 9, 3, 3, 3, 9, 5, 6, 7, 7, 3, 2, 4, 0, 8,
        7, 2, 2, 8, 9, 0, 2, 4, 4, 5, 7, 9, 9, 1, 3, 9, 1, 5, 5, 6, 0, 7, 4, 9,
        1, 6, 0, 0, 0, 4, 0, 9, 0, 4, 2, 5, 5, 8, 6, 2, 1, 9, 0, 4, 7, 1, 9, 5,
        9, 0, 2, 8, 5, 7, 7, 3, 2, 4, 5, 7, 8, 1, 9, 5, 6, 2, 9, 7, 4, 0, 9, 2,
        1, 5, 7, 7, 0, 2, 4, 5, 3, 3, 8, 1, 6, 2, 4, 8]) y_pred:  tensor([[1.0000e+00, 2.1949e-38, 1.0824e-25,  ..., 0.0000e+00, 2.5395e-33,
         2.3822e-44],
        [2.4177e-27, 1.0000e

 22%|██▎       | 9/40 [00:00<00:00, 41.37it/s]

y_true:  tensor([4, 6, 9, 9, 7, 5, 6, 1, 3, 1, 3, 2, 5, 7, 1, 4, 1, 5, 4, 4, 4, 7, 4, 7,
        1, 5, 6, 4, 2, 6, 9, 7, 0, 7, 9, 3, 2, 1, 7, 0, 9, 6, 8, 8, 5, 4, 8, 2,
        1, 7, 7, 8, 6, 7, 5, 0, 3, 5, 6, 7, 0, 0, 7, 4, 4, 6, 8, 0, 1, 6, 5, 3,
        3, 0, 4, 8, 3, 9, 8, 9, 2, 7, 9, 5, 0, 4, 9, 6, 2, 8, 4, 1, 4, 5, 8, 1,
        7, 1, 1, 6, 8, 8, 3, 9, 4, 5, 4, 6, 7, 3, 3, 6, 2, 4, 2, 6, 2, 1, 6, 8,
        1, 2, 3, 0, 1, 4, 2, 0, 3, 5, 6, 6, 4, 2, 4, 3, 1, 1, 7, 5, 9, 6, 4, 7,
        3, 3, 3, 3, 7, 0, 1, 7, 7, 0, 5, 0, 7, 8, 2, 4, 7, 4, 6, 4, 9, 1, 4, 4,
        6, 2, 4, 0, 2, 1, 9, 0, 3, 8, 6, 1, 3, 4, 8, 9, 6, 9, 6, 2, 5, 9, 7, 4,
        5, 6, 2, 6, 6, 8, 8, 5, 6, 8, 0, 0, 7, 9, 5, 9, 0, 5, 3, 3, 9, 6, 2, 5,
        8, 6, 7, 7, 0, 9, 9, 8, 6, 7, 0, 6, 2, 5, 0, 8, 4, 3, 9, 3, 0, 3, 2, 1,
        1, 8, 9, 0, 0, 7, 4, 1, 6, 2, 6, 2, 4, 1, 1, 4]) y_pred:  tensor([[3.1787e-12, 1.3790e-10, 9.9903e-01,  ..., 5.5328e-14, 5.6327e-11,
         2.8533e-12],
        [3.0861e-17, 8.9137e

 35%|███▌      | 14/40 [00:00<00:00, 42.80it/s]

y_true:  tensor([7, 9, 8, 1, 3, 6, 4, 7, 1, 6, 9, 4, 9, 3, 8, 1, 9, 5, 7, 8, 6, 4, 5, 2,
        4, 3, 3, 3, 9, 7, 6, 6, 6, 5, 3, 8, 3, 3, 2, 8, 8, 4, 8, 8, 6, 7, 6, 2,
        7, 5, 3, 4, 5, 1, 5, 4, 7, 3, 3, 6, 4, 0, 3, 4, 5, 1, 1, 9, 0, 4, 9, 6,
        3, 8, 5, 8, 3, 0, 3, 4, 2, 4, 2, 4, 1, 7, 5, 3, 9, 6, 0, 0, 3, 4, 0, 8,
        2, 3, 9, 6, 9, 7, 1, 9, 9, 8, 8, 9, 5, 2, 1, 0, 1, 6, 9, 4, 0, 2, 1, 5,
        0, 0, 8, 7, 8, 6, 9, 5, 2, 1, 4, 1, 1, 6, 2, 0, 1, 1, 6, 7, 0, 3, 9, 9,
        3, 8, 1, 2, 8, 1, 8, 5, 9, 8, 4, 1, 3, 4, 5, 8, 8, 6, 5, 0, 9, 1, 7, 1,
        2, 2, 0, 9, 3, 1, 8, 3, 1, 6, 7, 1, 1, 5, 5, 6, 6, 4, 6, 9, 0, 8, 6, 5,
        2, 9, 1, 8, 8, 3, 5, 5, 0, 8, 8, 5, 3, 0, 7, 5, 8, 0, 5, 3, 6, 4, 2, 8,
        3, 0, 2, 2, 3, 4, 5, 9, 9, 4, 9, 8, 4, 0, 9, 4, 8, 9, 1, 6, 9, 5, 2, 7,
        7, 3, 3, 6, 1, 2, 2, 3, 2, 0, 0, 6, 9, 5, 9, 9]) y_pred:  tensor([[7.0903e-31, 2.4009e-29, 1.2428e-30,  ..., 1.0000e+00, 9.4467e-30,
         6.3221e-20],
        [0.0000e+00, 7.0065e

 60%|██████    | 24/40 [00:00<00:00, 42.98it/s]

y_true:  tensor([0, 2, 7, 9, 0, 6, 1, 9, 4, 6, 1, 7, 1, 3, 7, 2, 7, 0, 5, 9, 2, 0, 6, 4,
        5, 4, 2, 9, 8, 6, 3, 7, 8, 4, 1, 0, 4, 0, 9, 0, 5, 2, 8, 0, 1, 5, 8, 1,
        1, 9, 2, 5, 3, 9, 9, 9, 4, 6, 4, 4, 8, 6, 0, 7, 4, 4, 5, 8, 7, 9, 8, 4,
        0, 2, 9, 1, 9, 3, 1, 0, 9, 0, 3, 3, 4, 2, 7, 5, 3, 3, 1, 7, 4, 7, 8, 8,
        0, 6, 5, 8, 8, 6, 8, 8, 5, 4, 0, 9, 5, 2, 5, 8, 4, 2, 7, 1, 1, 3, 1, 6,
        0, 0, 8, 6, 1, 3, 8, 1, 4, 9, 0, 1, 8, 8, 0, 1, 3, 8, 4, 7, 0, 5, 1, 7,
        4, 2, 3, 1, 2, 0, 6, 3, 5, 3, 7, 7, 8, 4, 0, 6, 5, 3, 9, 8, 7, 2, 0, 5,
        1, 5, 7, 5, 7, 9, 5, 0, 5, 0, 9, 4, 1, 6, 4, 0, 1, 4, 4, 4, 0, 9, 2, 2,
        9, 3, 4, 0, 2, 7, 7, 9, 8, 0, 1, 2, 8, 9, 8, 2, 4, 8, 2, 1, 9, 1, 9, 1,
        8, 1, 1, 4, 2, 2, 2, 9, 4, 6, 2, 1, 7, 1, 0, 9, 0, 3, 7, 4, 6, 0, 4, 0,
        3, 9, 3, 9, 0, 1, 7, 8, 7, 8, 6, 6, 1, 1, 4, 7]) y_pred:  tensor([[1.0000e+00, 1.8021e-42, 6.4219e-27,  ..., 0.0000e+00, 1.4536e-36,
         0.0000e+00],
        [2.7754e-06, 1.3581e

 72%|███████▎  | 29/40 [00:00<00:00, 42.91it/s]

y_true:  tensor([6, 1, 7, 8, 7, 1, 2, 7, 3, 4, 9, 0, 4, 6, 5, 6, 8, 1, 0, 8, 2, 1, 8, 2,
        5, 2, 2, 7, 8, 5, 3, 5, 3, 3, 4, 6, 4, 9, 2, 3, 9, 3, 9, 1, 3, 0, 6, 9,
        5, 2, 4, 8, 7, 1, 3, 7, 8, 4, 4, 1, 7, 5, 2, 1, 3, 3, 4, 5, 4, 6, 5, 0,
        7, 9, 2, 7, 7, 7, 2, 8, 5, 9, 3, 8, 9, 8, 9, 5, 6, 2, 3, 6, 7, 4, 6, 7,
        1, 4, 1, 5, 5, 1, 5, 8, 2, 5, 0, 9, 6, 8, 1, 8, 1, 3, 2, 6, 6, 6, 7, 4,
        6, 6, 6, 4, 2, 8, 8, 2, 9, 2, 8, 9, 7, 2, 9, 6, 3, 5, 0, 3, 6, 3, 6, 1,
        2, 5, 2, 7, 4, 6, 7, 7, 6, 5, 1, 7, 5, 4, 2, 2, 8, 8, 5, 3, 1, 0, 0, 3,
        2, 1, 3, 3, 8, 1, 3, 2, 3, 9, 5, 8, 1, 8, 2, 7, 9, 7, 5, 6, 2, 2, 3, 1,
        3, 6, 2, 1, 8, 0, 4, 9, 7, 1, 0, 6, 6, 7, 5, 8, 4, 3, 2, 7, 6, 0, 3, 2,
        9, 7, 1, 2, 3, 7, 9, 9, 2, 4, 7, 4, 6, 8, 9, 5, 0, 4, 7, 2, 9, 5, 5, 0,
        6, 3, 3, 0, 2, 7, 1, 4, 1, 6, 8, 5, 0, 2, 2, 9]) y_pred:  tensor([[1.0000e+00, 2.7988e-37, 1.7322e-25,  ..., 1.4013e-45, 1.9041e-32,
         1.4013e-45],
        [4.8322e-19, 1.0000e

 85%|████████▌ | 34/40 [00:00<00:00, 42.67it/s]

y_true:  tensor([7, 3, 4, 5, 4, 8, 3, 5, 0, 3, 9, 6, 0, 7, 9, 7, 4, 5, 1, 2, 2, 6, 7, 5,
        3, 0, 6, 5, 2, 0, 5, 6, 7, 2, 2, 3, 9, 2, 5, 6, 5, 0, 4, 8, 3, 5, 9, 4,
        3, 2, 7, 4, 1, 4, 0, 1, 9, 6, 3, 2, 8, 1, 9, 2, 1, 2, 9, 5, 0, 3, 3, 8,
        3, 2, 9, 9, 8, 4, 9, 3, 6, 1, 1, 3, 9, 6, 7, 8, 1, 5, 9, 5, 8, 6, 4, 0,
        8, 0, 5, 4, 1, 1, 7, 8, 0, 8, 9, 4, 4, 3, 9, 6, 9, 2, 4, 5, 1, 9, 0, 1,
        9, 0, 0, 0, 9, 1, 7, 2, 3, 5, 8, 6, 6, 8, 4, 8, 4, 3, 6, 3, 8, 3, 2, 8,
        8, 7, 5, 8, 0, 4, 8, 5, 7, 0, 8, 7, 9, 6, 1, 5, 7, 9, 5, 0, 0, 4, 4, 1,
        9, 5, 7, 0, 1, 3, 8, 4, 5, 1, 5, 9, 7, 8, 5, 8, 8, 6, 0, 4, 5, 2, 9, 9,
        0, 4, 1, 3, 2, 3, 4, 4, 2, 5, 4, 2, 1, 8, 1, 5, 9, 2, 8, 0, 7, 0, 6, 0,
        3, 6, 9, 4, 0, 1, 1, 0, 1, 2, 2, 8, 9, 0, 5, 5, 0, 4, 8, 7, 1, 4, 5, 8,
        6, 7, 2, 2, 6, 2, 4, 6, 4, 3, 8, 4, 5, 4, 2, 8]) y_pred:  tensor([[7.4632e-23, 1.0533e-22, 6.0749e-24,  ..., 1.0000e+00, 4.2999e-22,
         2.9426e-18],
        [1.8018e-33, 1.1620e

100%|██████████| 40/40 [00:00<00:00, 42.94it/s]

y_true:  tensor([4, 9, 4, 5, 1, 4, 6, 2, 1, 3, 3, 4, 0, 5, 4, 9, 1, 8, 5, 0, 2, 4, 8, 3,
        9, 3, 0, 5, 6, 2, 5, 8, 5, 1, 7, 2, 7, 1, 7, 1, 9, 2, 5, 1, 2, 4, 9, 0,
        1, 2, 8, 7, 6, 1, 2, 0, 8, 9, 1, 9, 0, 1, 0, 1, 4, 6, 7, 8, 8, 9, 0, 1,
        8, 5, 4, 9, 9, 2, 2, 4, 5, 7, 7, 9, 9, 7, 7, 3, 9, 0, 7, 6, 1, 5, 5, 8,
        5, 7, 5, 6, 5, 5, 5, 1, 5, 9, 4, 8, 5, 6, 3, 2, 6, 6, 7, 4, 2, 1, 0, 8,
        8, 1, 7, 9, 2, 1, 4, 2, 6, 8, 2, 3, 5, 5, 0, 5, 1, 5, 0, 9, 2, 6, 1, 0,
        9, 6, 2, 2, 2, 4, 3, 1, 7, 9, 4, 6, 4, 2, 7, 1, 1, 2, 7, 4, 6, 8, 7, 2,
        2, 6, 0, 7, 5, 0, 0, 2, 2, 5, 8, 6, 0, 8, 1, 6, 9, 4, 1, 4, 3, 6, 7, 2,
        8, 6, 6, 6, 1, 1, 7, 5, 6, 2, 5, 9, 0, 9, 8, 7, 6, 6, 6, 1, 1, 1, 8, 1,
        4, 8, 9, 7, 2, 7, 8, 0, 1, 8, 6, 0, 6, 8, 3, 8, 5, 5, 8, 7, 8, 2, 8, 6,
        5, 1, 5, 2, 3, 4, 8, 3, 6, 1, 8, 2, 7, 4, 7, 8]) y_pred:  tensor([[6.2517e-23, 4.5286e-16, 2.0721e-17,  ..., 1.1229e-21, 4.7293e-13,
         4.2895e-20],
        [6.3966e-28, 2.7315e


