## Imports

In [1]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px # this is another plotting library for interactive plot

from sklearn.model_selection import train_test_split
from sklearn import metrics, manifold # we will use the metrics and manifold learning modules from scikit-learn
from pathlib import Path # to interact with file paths
from PIL import Image # to interact with images
from tqdm import tqdm # progress bar
from pprint import pprint # pretty print (useful for a more readable print of objects like lists or dictionaries)
from IPython.display import clear_output # to clear the output of the notebook

import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import os
import shutil


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
device = torch.device("cpu")

cuda


In [2]:
# CONTROL
num_channels = 1
IMG_SIZE = (480, 640)
SIZE = (32,32)
ROI = [int(IMG_SIZE[0]/4), int(IMG_SIZE[0]*3/4), 
    int((IMG_SIZE[1]-IMG_SIZE[0]/2)/2), int(IMG_SIZE[1]-(IMG_SIZE[1]-IMG_SIZE[0]/2)/2)] #[a,b,c,d] ==> [a:b, c:d]
model_name = 'models/pedestrian_classifier_small.pt'
onnx_path = "models/pedestrian_classifier_small.onnx"
max_load = 10_000

In [3]:
#load examples
examples_folder = 'pedestrian_roadblock_imgs'     
names = [f for f in os.listdir(examples_folder) if f.endswith('.png')]
classes = ['pedestrian','roadblock', 'free_road']
example_imgs = [cv.imread(os.path.join(examples_folder, name), cv.IMREAD_GRAYSCALE) for name in names]
example_labels = [0 if name.startswith('pedestrian') else 1 if name.startswith('roadblock') else 2 for name in names]
tot_examples = len(example_imgs)
tot_labels = len(classes)       

#show images
cv.namedWindow('example', cv.WINDOW_NORMAL)
for i in range(tot_examples):
    cv.imshow('example', example_imgs[i])
    cv.waitKey(1)
cv.destroyAllWindows()      

## Load Pretrained Net and create Detector 

In [4]:
#Model
class FrontalClassifier(nn.Module):
    def __init__(self, out_dim=4, channels=1): 
        super().__init__()
        ### Convoluational layers
        self.conv = nn.Sequential( #in = (SIZE)
            nn.Conv2d(channels, 32, kernel_size=5, stride=1), #out = 28
            nn.ReLU(True),
            nn.Conv2d(32, 32, kernel_size=5, stride=2), #out = 12
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=1), #out = 11
            nn.BatchNorm2d(32),
            nn.Dropout(0.5),
            nn.Conv2d(32, 64, kernel_size=3, stride=2), #out = 9
            nn.ReLU(True),
            nn.Conv2d(64, 128, kernel_size=3, stride=2), #out = 4
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2), #out = 3
            nn.Dropout(0.5),
        )
        self.flat = nn.Flatten()
        ### Linear sections
        self.lin = nn.Sequential(
            # First linear layer
            nn.Linear(in_features=128, out_features=256),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(in_features=256, out_features=out_dim),
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.flat(x)
        x = self.lin(x)
        return x

classifier = FrontalClassifier(out_dim=tot_labels,channels=num_channels).to(device)

In [5]:
#show the image with opencv
img = cv.imread('tests/test_img.jpg')
img = cv.resize(img, SIZE)
if num_channels == 1:
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    img = np.expand_dims(img, axis=2)
#convert to tensor
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#add dimension
img = img.unsqueeze(0).to(device)
print(img.shape)

classifier.eval()

# Inference
with torch.no_grad():
    output = classifier(img)
    print(f'out shape: {output.shape}')

torch.Size([1, 1, 32, 32])
out shape: torch.Size([1, 3])


## Loading images and Labels

In [6]:
import cv2 as cv
import numpy as np
from numpy.random import randint

def load_and_augment_img(i, folder='training_imgs', example_index=0, example_imgs=example_imgs):
    img = cv.imread(os.path.join(folder, f'img_{i+1}.png'))
    #convert to gray
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    # img = randint(0,255,(480,640), dtype=np.uint8)

    #crop in the example ROI
    img = img[ROI[0]:ROI[1], ROI[2]:ROI[3]]

    #add random shapes to the image
    num_vertices = 5
    num_shapes = 3
    for i in range(num_shapes):
        #add polygon
        ptsx = randint(0,img.shape[1],(num_vertices,))
        ptsy = randint(0,img.shape[0],(num_vertices,))
        pts = np.stack((ptsx,ptsy), axis=1)
        img = cv.polylines(img, [pts], isClosed=False, color=randint(0,255), thickness=randint(2,10))
        img = cv.circle(img, (randint(0,img.shape[1]), randint(0,img.shape[0])), randint(5,60), randint(0,255), randint(1,8))

    ## EXAMPLE AUGMENTATION
    #load example
    example = example_imgs[example_index]
    resize_ratio = 240./max(example.shape[0], example.shape[1])
    example = cv.resize(example, (int(example.shape[1]*resize_ratio), int(example.shape[0]*resize_ratio)))
    if np.random.uniform() < 0.5:
        #flip
        example = cv.flip(example, 1)


    #get example mask
    example_mask = np.where(example == 0, np.zeros_like(example), 255*np.ones_like(example))
    #blur the example
    example = cv.blur(example, (randint(3,9),randint(3,9)))

    #add noise to the example
    std = 100
    std = randint(1, std)
    noisem = randint(0, std, example.shape, dtype=np.uint8)
    example = cv.subtract(example, noisem)
    noisep = randint(0, std, example.shape, dtype=np.uint8)
    example = cv.add(example, noisep)

    #dilate
    kernel = np.ones((randint(3,7),randint(3,7)), np.uint8)
    example = cv.dilate(example, kernel, iterations=1)

    #set zero where example mask is zero
    example = np.where(example_mask == 0, np.zeros_like(example), example)
    # cv.imshow('test', example)

    #random rotation
    angle = randint(-10,10)
    M = cv.getRotationMatrix2D((example.shape[1]/2, example.shape[0]/2), angle, 1)
    example = cv.warpAffine(example, M, (example.shape[1], example.shape[0]))

    #perspective transform example
    perspective_deformation = 30
    pts1 = np.float32([[0,0],[example.shape[1],0],[example.shape[1],example.shape[0]],[0,example.shape[0]]])
    pts2 = np.float32([[0,0],[example.shape[1],0],[example.shape[1],example.shape[0]],[0,example.shape[0]]])
    pts2 = pts2 + np.float32(randint(0,perspective_deformation,size=pts2.shape))
    # print(f'pts2 = \n{pts2}')
    new_size_x = int(np.max(pts2[:,0]) - np.min(pts2[:,0]))
    new_size_y = int(np.max(pts2[:,1]) - np.min(pts2[:,1]))
    M = cv.getPerspectiveTransform(pts1,pts2)
    example = cv.warpPerspective(example,M,(new_size_x,new_size_y))

    #resize example keeping proportions
    img_example_ratio = min(img.shape[0]/example.shape[0], img.shape[1]/example.shape[1])
    scale_factor = np.random.uniform(.75, .95)
    scale_factor = scale_factor * img_example_ratio
    example = cv.resize(example, (0,0), fx=scale_factor, fy=scale_factor)
    #match img shape
    example_canvas = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)

    #get a random position for the example
    example_y = randint(0, img.shape[0] - example.shape[0])
    example_x = randint(0, img.shape[1] - example.shape[1])
    #paste example on canvas
    example_canvas[example_y:example_y+example.shape[0], example_x:example_x+example.shape[1]] = example

    #reduce contrast
    old_example_canvas = example_canvas.copy()
    const = np.random.uniform(0.4,1.5)
    example_canvas = 127*(1-const) + example_canvas*const
    #clip values
    example_canvas = np.clip(example_canvas, 0, 255).astype(np.uint8)

    #paste canvas on img
    img = np.where(old_example_canvas > 0, example_canvas, img) 
    example_canvas = cv.blur(example_canvas, (randint(1,7),randint(1,7)))

    # #create random ellipses to simulate light from the sun
    # light = np.zeros(img.shape, dtype=np.uint8)
    # #add ellipses
    # for j in range(2):
    #     cent = (randint(0, img.shape[0]), randint(0, img.shape[1]))
    #     # axes_length = (randint(10, 50), randint(50, 300))
    #     axes_length = (randint(1, 50), randint(10, 100))
    #     angle = randint(0, 360)
    #     light = cv.ellipse(light, cent, axes_length, angle, 0, 360, 255, -1)
    # #create an image of random white and black pixels
    # noise = randint(0, 2, size=img.shape, dtype=np.uint8)*255
    # light = cv.blur(light, (10,10))
    # light = cv.subtract(light, noise)
    # light = 1 * light
    # #add light to the image
    # img = cv.add(img, light)

    #add noise 
    std = 80
    std = randint(1, std)
    noisem = randint(0, std, img.shape, dtype=np.uint8)
    img = cv.subtract(img, noisem)
    noisep = randint(0, std, img.shape, dtype=np.uint8)
    img = cv.add(img, noisep)
    
    #blur 
    img = cv.blur(img, (5,5))

    #add random brightness
    max_brightness = 50
    brightness = randint(-max_brightness, max_brightness)
    if brightness > 0:
        img = cv.add(img, brightness)
    elif brightness < 0:
        img = cv.subtract(img, -brightness)
    
    # invert color
    if np.random.uniform(0, 1) > 0.6:
        img = cv.bitwise_not(img)

    # resize 
    img = cv.resize(img, SIZE)

    # img = cv.equalizeHist(img)

    return img

cv.namedWindow('img', cv.WINDOW_NORMAL)
cv.namedWindow('test', cv.WINDOW_NORMAL)
# cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)

example_index = 0
for i in range(500):
    img = load_and_augment_img(i, example_index=example_index)
    cv.imshow('img', img)
    key = cv.waitKey(1)
    if key == ord('q') or key == 27:
        break
    example_index = (example_index + 1) % tot_labels
cv.destroyAllWindows()

In [7]:
class CsvDataset(Dataset):
    def __init__(self, folder, example_imgs=example_imgs, transform=None, max_load=1000, channels=3):
        self.transform = transform
        self.folder = folder
        self.data = []
        self.channels = channels
    
        with open(folder+'/regression_labels.csv', 'r') as f:
            lines = f.read().split('\n')
            lines = lines[0:-1] #remove footer
            # Get x and y values from each line and append to self.data
            max_load = min(max_load, len(lines))
            self.all_imgs = torch.zeros((max_load, SIZE[1], SIZE[0], channels), dtype=torch.uint8)

            cv.namedWindow('img', cv.WINDOW_NORMAL)
            cv.setWindowProperty('img', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)
            
            example_index = 0

            for i in tqdm(range(max_load)):
                #img 
                img = load_and_augment_img(i, example_index=example_index, example_imgs=example_imgs)

                max_show = 500
                if i < max_show:
                    cv.imshow('img', img)
                    cv.waitKey(1)
                    if i == (max_show-1):
                        cv.destroyAllWindows()
                
                #add a dimension to the image
                img = img[:, :,np.newaxis]
                self.all_imgs[i] = torch.from_numpy(img)
                
                #label (it is just the index of the example)
                self.data.append(example_index)  
                example_index = (example_index + 1) % tot_labels

    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # img = read_image(os.path.join(self.folder, f'img_{idx+1}.png'))
        # img = img.float()
        img = self.all_imgs[idx]
        img = img.permute(2, 0, 1).float()
        value = self.data[idx]
        return img, value

In [8]:
#create dataset #takes a long time but then training is faster
train_dataset = CsvDataset('training_imgs', max_load=max_load, channels=num_channels)
cv.destroyAllWindows()

100%|██████████| 10000/10000 [01:33<00:00, 106.92it/s]


In [9]:
#data loader
train_dataloader = DataLoader(train_dataset, batch_size=100, shuffle=True)

In [10]:
#test dataloader
sample = next(iter(train_dataloader))
print(sample[0].shape)
print(sample[1].shape)
# cv.namedWindow('img', cv.WINDOW_NORMAL)
# for i in range(100):
#     img = sample[0][i].permute(1, 2, 0).numpy().astype(np.uint8)
#     label = sample[1][i]
#     # print(img.shape)
#     cv.imshow(names[label], img)
#     k = cv.waitKey(0)
#     cv.destroyAllWindows()
#     if k == ord('q') or k == 27:
#         break

torch.Size([100, 1, 32, 32])
torch.Size([100])


## Training

In [11]:
# Training function
def train_epoch(model, dataloader, loss_fn, optimizer, device=device):
    # Set the model to training mode
    model.train() #train
    # Initialize the loss
    losses = []

    # Loop over the training batches
    for (input, label) in tqdm(dataloader):
        #convert label in one-hot vector
        target = torch.eye(tot_labels)[label] 
        # Move the input and target data to the selected device
        input, target = input.to(device), target.to(device)
        # Zero the gradients
        optimizer.zero_grad()
        # Compute the output
        output = model(input)

        #Loss
        loss = loss_fn(output, target)

        # Compute the gradients
        loss.backward()
        # Update the weights
        optimizer.step()

        #batch loss
        losses.append(loss.detach().cpu().numpy())

    # Return the average training loss
    loss = np.mean(losses)
    return loss

In [12]:
# TRAINING 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
classifier.to(device)
#parameters
lr = 0.001
epochs = 1
optimizer = torch.optim.Adam(classifier.parameters(), lr=lr, weight_decay=3e-5) #3e-5
loss_fn = torch.nn.CrossEntropyLoss()
for epoch in range(epochs):
    # try:
    if True:
        loss = train_epoch(classifier, train_dataloader, loss_fn, optimizer, device)
        clear_output(wait=True)
    # except Exception as e:
    #     print(e)
    #     torch.cuda.empty_cache()
    #     continue
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"loss: {loss}")
    torch.save(classifier.state_dict(), model_name)

Epoch 1/1
loss: 0.576439380645752


In [13]:
classifier.load_state_dict(torch.load(model_name))

#save the model so that opencv can load it
import torch
import torch.onnx
import torchvision
import torchvision.models as models
import sys

device = torch.device('cpu')
classifier.to(device)

# set the model to inference mode
classifier.eval()

# Create some sample input in the shape this model expects 
# This is needed because the convertion forward pass the network once 
dummy_input = torch.randn(1, num_channels, SIZE[1], SIZE[0])
torch.onnx.export(classifier, dummy_input, onnx_path, verbose=True)

clear_output(wait=False)

In [14]:
#test with opencv
sample_image = "training_imgs/img_1.png"
images = [cv.imread(f"training_imgs/img_{i+1}.png") for i in range(100)]
 
#The Magic:
lk =  cv.dnn.readNetFromONNX(onnx_path) 
print(onnx_path)

avg_col = (0,0,0) if num_channels == 3 else 0

sample = next(iter(train_dataloader))
images = [sample[0][i].permute(1, 2, 0).numpy().astype(np.uint8) for i in range(100)]
labels = [sample[1][i] for i in range(100)]

cv.namedWindow('img', cv.WINDOW_NORMAL)
for i in tqdm(range(100)):
    image = images[i]
    image = cv.resize(image, SIZE)
    #add noise 
    std = 80
    std = randint(1, std)
    noisem = randint(0, std, image.shape, dtype=np.uint8)
    image = cv.subtract(image, noisem)
    noisep = randint(0, std, image.shape, dtype=np.uint8)
    image = cv.add(image, noisep)
    image = cv.blur(image, (3,3))
    # if num_channels == 1:
    #     image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    blob = cv.dnn.blobFromImage(image, 1.0, SIZE, avg_col, swapRB=True, crop=False)
    assert SIZE == (32, 32)
    # print(blob.shape)
    lk.setInput(blob)
    preds = lk.forward()
    preds = preds[0]
    print(f'preds: {preds}')
    preds = preds.argmax()
    cv.putText(image, names[preds], (5, 10), cv.FONT_HERSHEY_SIMPLEX, 0.3, 0, 1)
    #put true label
    cv.putText(image, names[labels[i]], (5, 20), cv.FONT_HERSHEY_SIMPLEX, 0.3, 0, 1)
    cv.imshow("img", image)
    k = cv.waitKey(0)   
    if k == ord('q') or k == 27:
        break
cv.destroyAllWindows()

print(f"Predictions: {preds}")
print(f"Predictions shape: {preds.shape}")

models/pedestrian_classifier_small.onnx


  0%|          | 0/100 [00:00<?, ?it/s]

preds: [ -0.73728687  16.127693   -18.813717  ]


  1%|          | 1/100 [00:02<04:42,  2.85s/it]

preds: [ 0.31071287  4.08805    -5.1593347 ]


  2%|▏         | 2/100 [00:03<02:19,  1.42s/it]

preds: [ 0.3980618  3.746822  -4.9570856]


  3%|▎         | 3/100 [00:03<01:32,  1.05it/s]

preds: [ -0.39833432   9.148591   -10.418233  ]


  4%|▍         | 4/100 [00:04<01:13,  1.31it/s]

preds: [  1.1312968  10.200394  -13.341937 ]


  5%|▌         | 5/100 [00:04<01:03,  1.51it/s]

preds: [ 0.7235406  7.239696  -9.677542 ]


  6%|▌         | 6/100 [00:05<00:57,  1.63it/s]

preds: [ -0.6212154  11.732239  -12.972123 ]


  7%|▋         | 7/100 [00:05<00:54,  1.70it/s]

preds: [ 0.2804069  7.496891  -9.387354 ]


  9%|▉         | 9/100 [00:06<00:38,  2.35it/s]

preds: [ -1.8555509  18.21523   -20.06373  ]


 10%|█         | 10/100 [00:06<00:32,  2.77it/s]

preds: [ -1.874459  16.459103 -17.096992]
preds: [ 5.458214  -6.355572  -1.0987706]


 11%|█         | 11/100 [00:07<00:36,  2.43it/s]

preds: [ 1.4117874   0.65696704 -2.6261353 ]


 12%|█▏        | 12/100 [00:07<00:39,  2.21it/s]

preds: [ 4.6434746  -5.941686   -0.46274725]


 13%|█▎        | 13/100 [00:08<00:42,  2.03it/s]

preds: [ -0.1306749  12.566153  -15.057537 ]


 14%|█▍        | 14/100 [00:08<00:44,  1.91it/s]

preds: [ 2.0031974 -4.279322   1.5215364]


 15%|█▌        | 15/100 [00:09<00:56,  1.50it/s]

preds: [ 0.01869263  4.7329354  -5.4449687 ]


 16%|█▌        | 16/100 [00:10<01:04,  1.31it/s]

preds: [ 0.18916178  3.9323244  -4.8915963 ]


 17%|█▋        | 17/100 [00:11<00:56,  1.47it/s]

preds: [ -0.3565766   9.06713   -10.486619 ]


 18%|█▊        | 18/100 [00:11<00:51,  1.58it/s]

preds: [ -2.3537576  21.346931  -22.837492 ]


 19%|█▉        | 19/100 [00:12<00:48,  1.68it/s]

preds: [ 0.40724927 -4.239979    3.4637053 ]


 20%|██        | 20/100 [00:12<00:47,  1.70it/s]

preds: [ 0.65423924  2.0089638  -3.186534  ]


 21%|██        | 21/100 [00:13<00:46,  1.70it/s]

preds: [ -1.6031246  16.104263  -17.48974  ]


 22%|██▏       | 22/100 [00:13<00:42,  1.84it/s]

preds: [ 3.3196917 -5.517394   0.8022279]


 23%|██▎       | 23/100 [00:14<00:42,  1.82it/s]

preds: [ 2.010482   -2.0907686  -0.71604085]


 24%|██▍       | 24/100 [00:14<00:40,  1.86it/s]

preds: [ 2.742787  -8.065161   3.7410886]


 26%|██▌       | 26/100 [00:15<00:27,  2.68it/s]

preds: [ -1.2064304  18.261969  -20.73418  ]
preds: [ 0.67057693 -4.6458693   3.59709   ]


 28%|██▊       | 28/100 [00:15<00:20,  3.50it/s]

preds: [ 3.3368814 -1.123427  -3.2434156]


 29%|██▉       | 29/100 [00:15<00:18,  3.78it/s]

preds: [ 4.3585916 -6.833519   0.6875418]
preds: [ 0.7102581  0.9531143 -1.9959917]


 30%|███       | 30/100 [00:16<00:23,  3.04it/s]

preds: [ -0.22829854  13.24263    -15.976512  ]


 32%|███▏      | 32/100 [00:17<00:20,  3.28it/s]

preds: [ 1.2023199 -6.5594196  4.4936223]
preds: [ -0.88628   15.305637 -17.364729]


 33%|███▎      | 33/100 [00:17<00:20,  3.21it/s]

preds: [ 5.1317844 -5.776065  -1.2257028]


 34%|███▍      | 34/100 [00:17<00:19,  3.34it/s]

preds: [ 1.9619266 -1.2709882 -1.438403 ]


 35%|███▌      | 35/100 [00:17<00:18,  3.61it/s]

preds: [ 5.0312877 -3.9410217 -2.87509  ]


 36%|███▌      | 36/100 [00:18<00:16,  3.83it/s]

preds: [ 2.4220166  -4.527466    0.97134197]


 37%|███▋      | 37/100 [00:18<00:17,  3.53it/s]

preds: [ -2.0804355  22.40704   -24.695654 ]


 38%|███▊      | 38/100 [00:18<00:16,  3.66it/s]

preds: [ 0.28084692 -4.1957383   3.5304706 ]


 40%|████      | 40/100 [00:19<00:14,  4.20it/s]

preds: [ -0.8658131  14.786174  -16.816664 ]


 41%|████      | 41/100 [00:19<00:13,  4.42it/s]

preds: [ 1.288798 -6.686943  4.483503]
preds: [ 0.53266054  3.9168434  -5.1586533 ]


 42%|████▏     | 42/100 [00:19<00:15,  3.65it/s]

preds: [ 0.38160944  3.8491364  -5.0349455 ]


 43%|████▎     | 43/100 [00:20<00:18,  3.02it/s]

preds: [ 2.2057195 -7.2906294  3.7905917]


 44%|████▍     | 44/100 [00:20<00:20,  2.74it/s]

preds: [ -1.4550737  15.366939  -16.92294  ]


 45%|████▌     | 45/100 [00:21<00:22,  2.43it/s]

preds: [ -1.3947803  13.127969  -14.260843 ]


 46%|████▌     | 46/100 [00:21<00:22,  2.35it/s]

preds: [ -0.42924935  11.928717   -14.278844  ]


 47%|████▋     | 47/100 [00:21<00:22,  2.39it/s]

preds: [ 0.55028826  1.9768213  -2.8125806 ]


 48%|████▊     | 48/100 [00:22<00:24,  2.11it/s]

preds: [ 0.47401297  1.8027357  -2.6341093 ]


 49%|████▉     | 49/100 [00:23<00:23,  2.13it/s]

preds: [ 0.57706326 -1.2674538   0.5608396 ]


 50%|█████     | 50/100 [00:23<00:23,  2.09it/s]

preds: [ -0.59004337  14.169845   -16.455698  ]


 51%|█████     | 51/100 [00:24<00:25,  1.93it/s]

preds: [ 1.134756  -8.212007   5.9962587]


 52%|█████▏    | 52/100 [00:24<00:26,  1.80it/s]

preds: [ 2.1549017  6.275324  -9.778039 ]


 53%|█████▎    | 53/100 [00:25<00:24,  1.92it/s]

preds: [ -1.6806527  20.863087  -23.280863 ]


 54%|█████▍    | 54/100 [00:25<00:22,  2.04it/s]

preds: [ 0.735347   -0.5480509  -0.32499397]


 54%|█████▍    | 54/100 [00:35<00:30,  1.50it/s]

Predictions: 0
Predictions shape: ()





In [15]:
# #get image and label
# cv.namedWindow('img')
# dataloader = DataLoader(train_dataset, batch_size=10000, shuffle=False)
# for i, (imgs, labels) in enumerate(tqdm(dataloader)):
#     #convert img to numpy
#     imgs = imgs.cpu().numpy()
#     for i in range(imgs.shape[0]):
#         img = imgs[i][0]
#         #convert to uint8d
#         img = img.astype(np.uint8)
#         cv.imshow("img", img)
#         cv.waitKey(1)

# cv.destroyAllWindows()