In [1]:
import torch

from torchvision.transforms import ToTensor, Compose, Normalize

from data.dataset import get_data

from model.model import YOLONet
from model.train import train
from model.loss import DetectionLoss

DATASET = "../datasets/Human-Fall-Detection"
BATCH_SIZE = 8

SUBSET = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
transform_data = Compose([ToTensor(), Normalize((0.4379,), (0.3040,))])

data = get_data(DATASET, transform_data, device=device, batch_size=BATCH_SIZE, end=BATCH_SIZE if SUBSET else None)
 
train_loader, val_loader, test_loader = list(data["dataloaders"].values())

In [3]:
with torch.device(device):
  model = YOLONet(3, ch=(64,32,16))
  model.load_state_dict(torch.load('model_softmax_v2.pt'))
  criterion = DetectionLoss(model.head)

train(model, train_loader, val_loader, test_loader, 10, criterion)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
[1, 330] train loss: 2.6603, 139.1799, 2.8511: 100%|██████████| 337/337 [05:13<00:00,  1.08it/s]
[1, 110] valid loss: 2.7599, 143.5593, 2.9507: 100%|██████████| 113/113 [03:35<00:00,  1.90s/it]


[1] valid loss: 2.7314, 143.0701, 2.9240


[2, 330] train loss: 2.7353, 135.6245, 2.9472: 100%|██████████| 337/337 [42:46<00:00,  7.62s/it]
[2, 110] valid loss: 2.8066, 149.4768, 3.0401: 100%|██████████| 113/113 [03:19<00:00,  1.77s/it]


[2] valid loss: 2.7768, 148.4603, 3.0082


[3, 330] train loss: 2.8135, 128.2020, 2.8844: 100%|██████████| 337/337 [29:39<00:00,  5.28s/it]
[3, 110] valid loss: 2.7246, 148.2778, 2.9073: 100%|██████████| 113/113 [03:19<00:00,  1.76s/it]


[3] valid loss: 2.7073, 146.4245, 2.8816


[4, 330] train loss: 2.6357, 105.9392, 2.7356: 100%|██████████| 337/337 [37:59<00:00,  6.76s/it]
[4, 110] valid loss: 2.5865, 139.2719, 2.7614: 100%|██████████| 113/113 [03:32<00:00,  1.88s/it]


[4] valid loss: 2.5547, 137.6243, 2.7297


[5, 330] train loss: 2.5054, 109.3245, 2.6339: 100%|██████████| 337/337 [33:29<00:00,  5.96s/it]
[5, 110] valid loss: 2.5126, 137.4000, 2.6396: 100%|██████████| 113/113 [03:19<00:00,  1.77s/it]


[5] valid loss: 2.4855, 135.5972, 2.6105


[6, 330] train loss: 2.4713, 118.0051, 2.5512: 100%|██████████| 337/337 [33:37<00:00,  5.99s/it]
[6, 110] valid loss: 2.4366, 131.9383, 2.5650: 100%|██████████| 113/113 [03:30<00:00,  1.86s/it]


[6] valid loss: 2.4119, 131.0715, 2.5403


[7, 330] train loss: 2.3747, 128.0989, 2.4035: 100%|██████████| 337/337 [30:50<00:00,  5.49s/it]
[7, 110] valid loss: 2.3540, 133.2831, 2.4908: 100%|██████████| 113/113 [03:02<00:00,  1.61s/it]


[7] valid loss: 2.3326, 132.2498, 2.4709


[8, 330] train loss: 2.2641, 112.4141, 2.4322: 100%|██████████| 337/337 [23:51<00:00,  4.25s/it]
[8, 110] valid loss: 2.3503, 129.5199, 2.4798: 100%|██████████| 113/113 [03:00<00:00,  1.59s/it]


[8] valid loss: 2.3304, 127.3902, 2.4558


[9, 330] train loss: 2.2825, 102.2409, 2.4253: 100%|██████████| 337/337 [19:36<00:00,  3.49s/it]
[9, 110] valid loss: 2.3240, 127.0210, 2.4566: 100%|██████████| 113/113 [03:16<00:00,  1.74s/it]


[9] valid loss: 2.3033, 125.9188, 2.4336


[10, 330] train loss: 2.1344, 98.2313, 2.3436: 100%|██████████| 337/337 [14:58<00:00,  2.67s/it] 
[10, 110] valid loss: 2.2890, 125.4112, 2.4283: 100%|██████████| 113/113 [02:40<00:00,  1.42s/it]


[10] valid loss: 2.2730, 124.8885, 2.4106


[10, 110] test loss: 2.3277, 97.5627, 2.4328: 100%|██████████| 113/113 [02:45<00:00,  1.47s/it]

Final test loss: tensor([ 2.3079, 95.9329,  2.4111], device='cuda:0')
Finished Training





In [4]:
model.eval()
image, label = next(iter(test_loader))
print(image.shape)
output = model(image)
output.shape

torch.Size([8, 3, 640, 640])
torch.Size([8, 4, 2100]) torch.Size([8, 3, 2100]) torch.Size([8, 7, 2100])


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


torch.Size([8, 7, 2100])

In [5]:
boxes, clses = output.split((4,3), 1)
boxes.shape, clses.shape

(torch.Size([8, 4, 2100]), torch.Size([8, 3, 2100]))

In [6]:
conf, cls_ids = torch.max(clses.log_softmax(1), 1)

In [12]:
clses.log_softmax(1).exp()

tensor([[[0.5308, 0.3990, 0.3713,  ..., 0.2079, 0.1830, 0.1774],
         [0.3052, 0.3039, 0.2427,  ..., 0.4329, 0.4094, 0.4607],
         [0.1641, 0.2971, 0.3861,  ..., 0.3592, 0.4076, 0.3619]],

        [[0.6736, 0.6001, 0.5446,  ..., 0.3976, 0.3322, 0.2089],
         [0.2365, 0.2926, 0.2720,  ..., 0.2430, 0.3046, 0.3571],
         [0.0899, 0.1073, 0.1833,  ..., 0.3594, 0.3631, 0.4340]],

        [[0.5556, 0.5198, 0.5598,  ..., 0.2366, 0.2324, 0.1575],
         [0.3098, 0.3181, 0.2846,  ..., 0.3536, 0.3494, 0.3737],
         [0.1346, 0.1621, 0.1555,  ..., 0.4097, 0.4182, 0.4687]],

        ...,

        [[0.6948, 0.6742, 0.6969,  ..., 0.2171, 0.2109, 0.1795],
         [0.2285, 0.2525, 0.2241,  ..., 0.3283, 0.3176, 0.3685],
         [0.0768, 0.0733, 0.0791,  ..., 0.4546, 0.4716, 0.4520]],

        [[0.5282, 0.4670, 0.4694,  ..., 0.4771, 0.3539, 0.2313],
         [0.3432, 0.3797, 0.3734,  ..., 0.2233, 0.2882, 0.3500],
         [0.1286, 0.1533, 0.1573,  ..., 0.2996, 0.3579, 0.4187]],

 

In [74]:
torch.functional.F.softmax(clses, 1)[0, :, 0]

tensor([0.3333, 0.3333, 0.3333], device='cuda:0', grad_fn=<SelectBackward0>)

In [18]:
t = torch.rand(1, 4, 3)
t

tensor([[[0.9796, 0.8334, 0.0744],
         [0.2290, 0.4009, 0.7931],
         [0.7164, 0.3633, 0.4729],
         [0.4335, 0.0868, 0.1258]]])

In [27]:
torch.functional.F.softmax(t, 2)

tensor([[[0.4408, 0.3809, 0.1783],
         [0.2535, 0.3010, 0.4455],
         [0.4022, 0.2825, 0.3153],
         [0.4095, 0.2895, 0.3010]]])

In [16]:
clses.max()

tensor(0.0427, device='cuda:0', grad_fn=<MaxBackward1>)

In [4]:
torch.save(model.state_dict(), 'model_softmax_v2.pt')

In [5]:
image, label = next(iter(test_loader))

In [4]:
from tqdm import tqdm

pbar = tqdm(test_loader)
for i, (image, label) in enumerate(pbar):
  # print(image.shape, label.shape)
  pbar.set_description_str(f"{image.shape} {label.shape}")
  continue
  output = model(image)
  pbar.set_description_str(f"{output.shape}")
  # boxes, clses = output.split((4,3), 1)
  # conf, cls_ids = clses.max(1)
  # print(cls_ids)
  # break

torch.Size([7, 3, 640, 640]) torch.Size([10, 6]): 100%|██████████| 113/113 [00:07<00:00, 14.80it/s]
