In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from torchvision.transforms import ToTensor, Compose, Normalize

from data.dataset import YOLOv8Dataset as Dataset, collate_fn

In [2]:
transform_data = Compose([ToTensor(), Normalize((0.4379,), (0.3040,))])

DATASET = "../datasets/Human-Fall-Detection"

train_dataset = Dataset(DATASET + '/train', transform=transform_data)
val_dataset = Dataset(DATASET + '/valid', transform=transform_data)
test_dataset = Dataset(DATASET + '/test', transform=transform_data)

train_subset = Dataset(train_dataset, end=64)
val_subset = Dataset(val_dataset, end=8)
test_subset = Dataset(test_dataset, end=8)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)


In [3]:
samples = train_dataset[:5]

In [4]:
train_dataset[4][1].shape[0]


5

In [5]:
train_sample = next(iter(train_loader))

In [6]:
train_sample[0].shape, train_sample[1].shape

(torch.Size([8, 3, 640, 640]), torch.Size([18, 6]))

In [7]:
import torch
from model.model import YOLONet

from model.train import train
from model.loss import DetectionLoss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with torch.device(device):
  model = YOLONet(ch=(64,32,32))
  criterion = DetectionLoss(model.head)



In [8]:
images, labels = next(iter(train_loader))
display(images.device, next(model.parameters()).device)

prev = None
for p in model.parameters():
  if p.device != prev:
    print(p.device)
    prev = p.device


device(type='cuda', index=0)

device(type='cuda', index=0)

cuda:0


In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with torch.device(device):
  model = YOLONet(ch=(64,32,32))
  criterion = DetectionLoss(model.head)

train(model, train_loader, val_loader, test_loader, 100, criterion)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  3%|▎         | 10/337 [02:30<1:36:37, 17.73s/it]

[1, 10] train loss: tensor([3.2190, 5.5430, 4.1840], device='cuda:0')


  6%|▌         | 20/337 [05:01<1:33:44, 17.74s/it]

[1, 20] train loss: tensor([3.3231, 4.7452, 4.0227], device='cuda:0')


  9%|▉         | 30/337 [07:31<1:30:32, 17.69s/it]

[1, 30] train loss: tensor([3.1793, 4.3101, 3.8435], device='cuda:0')


 12%|█▏        | 40/337 [10:01<1:27:37, 17.70s/it]

[1, 40] train loss: tensor([3.1656, 4.0105, 3.7336], device='cuda:0')


 15%|█▍        | 50/337 [12:32<1:24:45, 17.72s/it]

[1, 50] train loss: tensor([3.2188, 3.6826, 3.6633], device='cuda:0')


 18%|█▊        | 60/337 [15:02<1:21:49, 17.72s/it]

[1, 60] train loss: tensor([3.0722, 3.7976, 3.6497], device='cuda:0')


 21%|██        | 70/337 [17:33<1:18:53, 17.73s/it]

[1, 70] train loss: tensor([3.1534, 3.5726, 3.5799], device='cuda:0')


 24%|██▎       | 80/337 [20:04<1:15:55, 17.72s/it]

[1, 80] train loss: tensor([3.2127, 3.6757, 3.5607], device='cuda:0')


 27%|██▋       | 90/337 [22:34<1:12:57, 17.72s/it]

[1, 90] train loss: tensor([3.0202, 3.3783, 3.4730], device='cuda:0')


 30%|██▉       | 100/337 [25:05<1:10:12, 17.78s/it]

[1, 100] train loss: tensor([3.0320, 3.5280, 3.4185], device='cuda:0')


 33%|███▎      | 110/337 [27:36<1:07:01, 17.72s/it]

[1, 110] train loss: tensor([3.0318, 3.4190, 3.4230], device='cuda:0')


 36%|███▌      | 120/337 [30:06<1:04:15, 17.77s/it]

[1, 120] train loss: tensor([3.0972, 3.2804, 3.3236], device='cuda:0')


 39%|███▊      | 130/337 [32:37<1:00:54, 17.65s/it]

[1, 130] train loss: tensor([3.0693, 3.3184, 3.3720], device='cuda:0')


 42%|████▏     | 140/337 [35:07<58:03, 17.68s/it]  

[1, 140] train loss: tensor([2.9374, 3.3524, 3.3503], device='cuda:0')


 45%|████▍     | 150/337 [37:37<55:05, 17.68s/it]

[1, 150] train loss: tensor([2.9669, 3.3083, 3.3534], device='cuda:0')


 47%|████▋     | 160/337 [40:08<52:13, 17.70s/it]

[1, 160] train loss: tensor([3.0688, 3.2111, 3.3916], device='cuda:0')


 50%|█████     | 170/337 [42:38<49:15, 17.70s/it]

[1, 170] train loss: tensor([3.0027, 3.3387, 3.3013], device='cuda:0')


 53%|█████▎    | 180/337 [45:09<46:20, 17.71s/it]

[1, 180] train loss: tensor([2.9039, 3.3110, 3.2543], device='cuda:0')


 56%|█████▋    | 190/337 [47:39<43:18, 17.68s/it]

[1, 190] train loss: tensor([3.0724, 3.3859, 3.3743], device='cuda:0')


 59%|█████▉    | 200/337 [50:10<40:34, 17.77s/it]

[1, 200] train loss: tensor([2.8270, 3.3586, 3.3192], device='cuda:0')


 62%|██████▏   | 210/337 [52:41<37:32, 17.74s/it]

[1, 210] train loss: tensor([2.7939, 3.2810, 3.3045], device='cuda:0')


 65%|██████▌   | 220/337 [55:11<34:30, 17.69s/it]

[1, 220] train loss: tensor([2.9244, 3.3915, 3.2989], device='cuda:0')


 68%|██████▊   | 230/337 [57:41<31:30, 17.67s/it]

[1, 230] train loss: tensor([2.9689, 3.4324, 3.2833], device='cuda:0')


 71%|███████   | 240/337 [1:00:12<28:34, 17.68s/it]

[1, 240] train loss: tensor([2.8796, 3.4388, 3.2457], device='cuda:0')


 74%|███████▍  | 250/337 [1:02:42<25:37, 17.67s/it]

[1, 250] train loss: tensor([2.8413, 3.2625, 3.2568], device='cuda:0')


 77%|███████▋  | 260/337 [1:05:12<22:44, 17.72s/it]

[1, 260] train loss: tensor([2.8306, 3.2954, 3.1979], device='cuda:0')


 80%|████████  | 270/337 [1:07:43<19:43, 17.67s/it]

[1, 270] train loss: tensor([2.8133, 3.2016, 3.1781], device='cuda:0')


 83%|████████▎ | 280/337 [1:10:13<16:48, 17.70s/it]

[1, 280] train loss: tensor([2.7694, 3.2345, 3.1361], device='cuda:0')


 86%|████████▌ | 290/337 [1:12:44<13:51, 17.69s/it]

[1, 290] train loss: tensor([2.9062, 3.2301, 3.1834], device='cuda:0')


 89%|████████▉ | 300/337 [1:15:14<10:54, 17.70s/it]

[1, 300] train loss: tensor([2.8286, 3.3075, 3.2029], device='cuda:0')


 92%|█████████▏| 310/337 [1:17:44<07:57, 17.68s/it]

[1, 310] train loss: tensor([2.7859, 3.3588, 3.1750], device='cuda:0')


 95%|█████████▍| 320/337 [1:20:15<05:01, 17.76s/it]

[1, 320] train loss: tensor([2.8405, 3.2704, 3.1275], device='cuda:0')


 98%|█████████▊| 330/337 [1:22:46<02:04, 17.73s/it]

[1, 330] train loss: tensor([2.8470, 3.3334, 3.2390], device='cuda:0')


100%|██████████| 337/337 [1:24:16<00:00, 15.01s/it]
  9%|▉         | 10/113 [01:19<14:01,  8.17s/it]

[1, 10] valid loss: tensor([2.9286, 3.3355, 3.2683], device='cuda:0')


 18%|█▊        | 20/113 [02:41<12:30,  8.07s/it]

[1, 20] valid loss: tensor([2.9825, 3.5148, 3.3065], device='cuda:0')


 27%|██▋       | 30/113 [04:04<11:27,  8.28s/it]

[1, 30] valid loss: tensor([3.2174, 3.3138, 3.2333], device='cuda:0')


 35%|███▌      | 40/113 [05:27<10:05,  8.30s/it]

[1, 40] valid loss: tensor([3.5216, 3.2766, 3.2433], device='cuda:0')


 44%|████▍     | 50/113 [06:51<08:47,  8.37s/it]

[1, 50] valid loss: tensor([2.5998, 3.4210, 3.2696], device='cuda:0')


 53%|█████▎    | 60/113 [08:14<07:23,  8.37s/it]

[1, 60] valid loss: tensor([3.6361, 3.5335, 3.3704], device='cuda:0')


 62%|██████▏   | 70/113 [09:37<05:57,  8.32s/it]

[1, 70] valid loss: tensor([2.6152, 3.4354, 3.3727], device='cuda:0')


 71%|███████   | 80/113 [11:00<04:35,  8.34s/it]

[1, 80] valid loss: tensor([2.3721, 3.6373, 3.3467], device='cuda:0')


 80%|███████▉  | 90/113 [12:24<03:10,  8.30s/it]

[1, 90] valid loss: tensor([2.7839, 3.7056, 3.2289], device='cuda:0')


 88%|████████▊ | 100/113 [13:47<01:48,  8.36s/it]

[1, 100] valid loss: tensor([2.8369, 3.6784, 3.3326], device='cuda:0')


 97%|█████████▋| 110/113 [15:11<00:25,  8.37s/it]

[1, 110] valid loss: tensor([2.8070, 3.6566, 3.3406], device='cuda:0')


100%|██████████| 113/113 [15:36<00:00,  8.29s/it]
  3%|▎         | 10/337 [03:42<2:23:24, 26.31s/it]

[2, 10] train loss: tensor([2.7993, 3.3382, 3.1695], device='cuda:0')


  6%|▌         | 20/337 [07:24<2:19:00, 26.31s/it]

[2, 20] train loss: tensor([2.8861, 3.3255, 3.2283], device='cuda:0')


  9%|▉         | 30/337 [11:06<2:14:40, 26.32s/it]

[2, 30] train loss: tensor([2.9446, 3.3441, 3.1648], device='cuda:0')


 12%|█▏        | 40/337 [14:48<2:10:21, 26.34s/it]

[2, 40] train loss: tensor([2.9039, 3.2040, 3.1432], device='cuda:0')


 15%|█▍        | 50/337 [18:30<2:05:41, 26.28s/it]

[2, 50] train loss: tensor([2.8425, 3.3305, 3.1250], device='cuda:0')


 18%|█▊        | 60/337 [22:13<2:01:27, 26.31s/it]

[2, 60] train loss: tensor([2.8703, 3.2372, 3.1227], device='cuda:0')


 21%|██        | 70/337 [25:55<1:56:54, 26.27s/it]

[2, 70] train loss: tensor([2.7386, 3.2758, 3.0798], device='cuda:0')


 24%|██▎       | 80/337 [29:37<1:52:43, 26.32s/it]

[2, 80] train loss: tensor([2.6873, 3.2972, 3.1011], device='cuda:0')


 27%|██▋       | 90/337 [33:19<1:48:16, 26.30s/it]

[2, 90] train loss: tensor([2.8372, 3.2981, 3.1308], device='cuda:0')


 30%|██▉       | 100/337 [37:01<1:44:03, 26.34s/it]

[2, 100] train loss: tensor([2.8488, 3.2921, 3.1380], device='cuda:0')


 33%|███▎      | 110/337 [40:43<1:39:23, 26.27s/it]

[2, 110] train loss: tensor([2.6453, 3.2323, 3.1038], device='cuda:0')


 36%|███▌      | 120/337 [44:25<1:35:09, 26.31s/it]

[2, 120] train loss: tensor([2.8001, 3.2119, 3.0579], device='cuda:0')


 39%|███▊      | 130/337 [48:07<1:30:42, 26.29s/it]

[2, 130] train loss: tensor([2.6993, 3.2664, 3.1154], device='cuda:0')


 42%|████▏     | 140/337 [51:50<1:26:22, 26.31s/it]

[2, 140] train loss: tensor([2.8388, 3.3247, 3.1305], device='cuda:0')


 45%|████▍     | 150/337 [55:32<1:21:58, 26.30s/it]

[2, 150] train loss: tensor([2.7989, 3.1801, 3.0941], device='cuda:0')


 47%|████▋     | 160/337 [59:14<1:17:38, 26.32s/it]

[2, 160] train loss: tensor([2.8681, 3.2275, 3.0933], device='cuda:0')


 50%|█████     | 170/337 [1:02:56<1:13:11, 26.30s/it]

[2, 170] train loss: tensor([2.8542, 3.2393, 3.0553], device='cuda:0')


 53%|█████▎    | 180/337 [1:06:39<1:08:53, 26.33s/it]

[2, 180] train loss: tensor([2.8016, 3.1497, 3.0528], device='cuda:0')


 56%|█████▋    | 190/337 [1:10:21<1:04:27, 26.31s/it]

[2, 190] train loss: tensor([2.8448, 3.2589, 3.0726], device='cuda:0')


 59%|█████▉    | 200/337 [1:14:03<1:00:06, 26.33s/it]

[2, 200] train loss: tensor([2.7046, 3.1955, 2.9832], device='cuda:0')


 62%|██████▏   | 210/337 [1:17:45<55:44, 26.34s/it]  

[2, 210] train loss: tensor([2.7712, 3.3008, 3.0801], device='cuda:0')


 65%|██████▌   | 220/337 [1:21:28<51:21, 26.34s/it]

[2, 220] train loss: tensor([2.7582, 3.1001, 3.0506], device='cuda:0')


 68%|██████▊   | 230/337 [1:25:10<46:54, 26.31s/it]

[2, 230] train loss: tensor([2.6660, 3.0927, 3.0452], device='cuda:0')


 71%|███████   | 240/337 [1:28:52<42:31, 26.31s/it]

[2, 240] train loss: tensor([2.5502, 3.1363, 2.9717], device='cuda:0')


 74%|███████▍  | 250/337 [1:32:34<38:08, 26.31s/it]

[2, 250] train loss: tensor([2.6850, 3.2151, 2.9847], device='cuda:0')


 77%|███████▋  | 260/337 [1:36:16<33:47, 26.33s/it]

[2, 260] train loss: tensor([2.6797, 3.2259, 2.9670], device='cuda:0')


 80%|████████  | 270/337 [1:39:59<29:24, 26.34s/it]

[2, 270] train loss: tensor([2.6541, 3.2269, 3.0225], device='cuda:0')


 83%|████████▎ | 280/337 [1:43:41<24:58, 26.30s/it]

[2, 280] train loss: tensor([2.7711, 3.2733, 3.0073], device='cuda:0')


 86%|████████▌ | 290/337 [1:47:23<20:37, 26.32s/it]

[2, 290] train loss: tensor([2.7050, 3.2233, 3.0225], device='cuda:0')


 89%|████████▉ | 300/337 [1:51:05<16:13, 26.32s/it]

[2, 300] train loss: tensor([2.7377, 3.1451, 3.0158], device='cuda:0')


 92%|█████████▏| 310/337 [1:54:48<11:50, 26.32s/it]

[2, 310] train loss: tensor([2.5796, 3.1345, 2.9570], device='cuda:0')


 95%|█████████▍| 320/337 [1:58:30<07:27, 26.34s/it]

[2, 320] train loss: tensor([2.6255, 3.2065, 2.9763], device='cuda:0')


 98%|█████████▊| 330/337 [2:02:12<03:04, 26.31s/it]

[2, 330] train loss: tensor([2.8031, 3.2366, 2.9868], device='cuda:0')


100%|██████████| 337/337 [2:04:26<00:00, 22.16s/it]
  9%|▉         | 10/113 [01:24<14:12,  8.28s/it]

[2, 10] valid loss: tensor([2.7501, 3.5515, 3.1577], device='cuda:0')


 18%|█▊        | 20/113 [02:46<12:39,  8.17s/it]

[2, 20] valid loss: tensor([2.9378, 3.1924, 3.0705], device='cuda:0')


 27%|██▋       | 30/113 [04:07<11:11,  8.09s/it]

[2, 30] valid loss: tensor([2.6419, 3.0456, 2.8998], device='cuda:0')


 35%|███▌      | 40/113 [05:30<09:53,  8.13s/it]

[2, 40] valid loss: tensor([2.4163, 3.2648, 2.7638], device='cuda:0')


 44%|████▍     | 50/113 [06:51<08:33,  8.14s/it]

[2, 50] valid loss: tensor([2.6196, 3.3396, 3.0097], device='cuda:0')


 53%|█████▎    | 60/113 [08:12<07:13,  8.17s/it]

[2, 60] valid loss: tensor([2.4729, 3.4650, 3.0708], device='cuda:0')


 62%|██████▏   | 70/113 [09:35<05:54,  8.25s/it]

[2, 70] valid loss: tensor([2.7994, 3.1808, 3.1501], device='cuda:0')


 71%|███████   | 80/113 [10:59<04:36,  8.39s/it]

[2, 80] valid loss: tensor([2.5830, 3.0346, 2.9765], device='cuda:0')


 80%|███████▉  | 90/113 [12:23<03:13,  8.41s/it]

[2, 90] valid loss: tensor([2.7042, 3.0426, 2.8057], device='cuda:0')


 88%|████████▊ | 100/113 [13:46<01:47,  8.25s/it]

[2, 100] valid loss: tensor([2.8907, 3.2242, 3.2159], device='cuda:0')


 97%|█████████▋| 110/113 [15:09<00:24,  8.30s/it]

[2, 110] valid loss: tensor([2.8794, 3.6849, 3.1676], device='cuda:0')


100%|██████████| 113/113 [15:34<00:00,  8.27s/it]
  3%|▎         | 10/337 [03:37<2:20:12, 25.73s/it]

[3, 10] train loss: tensor([2.7042, 3.2033, 2.9566], device='cuda:0')


  6%|▌         | 20/337 [07:14<2:15:57, 25.73s/it]

[3, 20] train loss: tensor([2.6921, 3.0889, 2.9486], device='cuda:0')


  9%|▉         | 30/337 [10:52<2:11:35, 25.72s/it]

[3, 30] train loss: tensor([2.6512, 3.1188, 2.9954], device='cuda:0')


 12%|█▏        | 40/337 [14:29<2:07:19, 25.72s/it]

[3, 40] train loss: tensor([2.6898, 3.0632, 2.9746], device='cuda:0')


 15%|█▍        | 50/337 [18:07<2:03:00, 25.72s/it]

[3, 50] train loss: tensor([2.5890, 3.1683, 2.9156], device='cuda:0')


 18%|█▊        | 60/337 [21:44<1:58:45, 25.72s/it]

[3, 60] train loss: tensor([2.6495, 3.2024, 2.9301], device='cuda:0')


 21%|██        | 70/337 [25:21<1:54:23, 25.71s/it]

[3, 70] train loss: tensor([2.6484, 3.0783, 2.9905], device='cuda:0')


 24%|██▎       | 80/337 [28:59<1:50:24, 25.78s/it]

[3, 80] train loss: tensor([2.6544, 3.1348, 2.9958], device='cuda:0')


 27%|██▋       | 90/337 [32:37<1:45:55, 25.73s/it]

[3, 90] train loss: tensor([2.6217, 3.1326, 2.9122], device='cuda:0')


 30%|██▉       | 100/337 [36:14<1:41:43, 25.75s/it]

[3, 100] train loss: tensor([2.6033, 3.0766, 2.9150], device='cuda:0')


 33%|███▎      | 110/337 [39:52<1:37:21, 25.73s/it]

[3, 110] train loss: tensor([2.5468, 3.1597, 2.9035], device='cuda:0')


 36%|███▌      | 120/337 [43:29<1:33:09, 25.76s/it]

[3, 120] train loss: tensor([2.6076, 3.0544, 2.9760], device='cuda:0')


 39%|███▊      | 130/337 [47:07<1:28:48, 25.74s/it]

[3, 130] train loss: tensor([2.6214, 3.0956, 2.8905], device='cuda:0')


 42%|████▏     | 140/337 [50:44<1:24:24, 25.71s/it]

[3, 140] train loss: tensor([2.5675, 3.1719, 2.9721], device='cuda:0')


 45%|████▍     | 150/337 [54:22<1:20:13, 25.74s/it]

[3, 150] train loss: tensor([2.6686, 3.0336, 2.8460], device='cuda:0')


 47%|████▋     | 160/337 [57:59<1:15:52, 25.72s/it]

[3, 160] train loss: tensor([2.6285, 3.1439, 2.9339], device='cuda:0')


 50%|█████     | 170/337 [1:01:37<1:11:40, 25.75s/it]

[3, 170] train loss: tensor([2.5885, 3.0464, 2.8897], device='cuda:0')


 53%|█████▎    | 180/337 [1:05:14<1:07:23, 25.76s/it]

[3, 180] train loss: tensor([2.5647, 3.0737, 2.9172], device='cuda:0')


 56%|█████▋    | 190/337 [1:08:52<1:03:04, 25.75s/it]

[3, 190] train loss: tensor([2.5868, 3.0309, 2.8842], device='cuda:0')


 59%|█████▉    | 200/337 [1:12:29<58:46, 25.74s/it]  

[3, 200] train loss: tensor([2.6905, 3.1111, 2.8989], device='cuda:0')


 62%|██████▏   | 210/337 [1:16:07<54:29, 25.74s/it]

[3, 210] train loss: tensor([2.4929, 3.0561, 2.8774], device='cuda:0')


 65%|██████▌   | 220/337 [1:19:44<50:10, 25.73s/it]

[3, 220] train loss: tensor([2.5238, 3.1616, 2.8997], device='cuda:0')


 68%|██████▊   | 230/337 [1:23:22<45:52, 25.73s/it]

[3, 230] train loss: tensor([2.5671, 3.1696, 2.8953], device='cuda:0')


 71%|███████   | 240/337 [1:27:10<42:37, 26.36s/it]

[3, 240] train loss: tensor([2.6463, 3.0149, 2.8474], device='cuda:0')


 74%|███████▍  | 250/337 [1:30:48<37:19, 25.74s/it]

[3, 250] train loss: tensor([2.6788, 3.0225, 2.9141], device='cuda:0')


 77%|███████▋  | 260/337 [1:34:25<33:02, 25.74s/it]

[3, 260] train loss: tensor([2.6288, 3.1062, 2.8874], device='cuda:0')


 80%|████████  | 270/337 [1:38:03<28:42, 25.71s/it]

[3, 270] train loss: tensor([2.4942, 3.0649, 2.8425], device='cuda:0')


 83%|████████▎ | 280/337 [1:41:40<24:29, 25.78s/it]

[3, 280] train loss: tensor([2.5665, 3.1355, 2.8540], device='cuda:0')


 86%|████████▌ | 290/337 [1:45:17<20:09, 25.74s/it]

[3, 290] train loss: tensor([2.5902, 3.1421, 2.9004], device='cuda:0')


 89%|████████▉ | 300/337 [1:48:55<15:52, 25.74s/it]

[3, 300] train loss: tensor([2.6297, 3.1365, 2.9062], device='cuda:0')


 92%|█████████▏| 310/337 [1:52:32<11:33, 25.69s/it]

[3, 310] train loss: tensor([2.6582, 3.1290, 2.9593], device='cuda:0')


 95%|█████████▍| 320/337 [1:56:10<07:17, 25.75s/it]

[3, 320] train loss: tensor([2.6487, 3.1488, 2.9490], device='cuda:0')


 98%|█████████▊| 330/337 [1:59:47<03:00, 25.75s/it]

[3, 330] train loss: tensor([2.6370, 2.9949, 2.8556], device='cuda:0')


100%|██████████| 337/337 [2:01:58<00:00, 21.72s/it]
  9%|▉         | 10/113 [01:21<14:02,  8.18s/it]

[3, 10] valid loss: tensor([2.8146, 3.0600, 2.8429], device='cuda:0')


 18%|█▊        | 20/113 [02:44<12:47,  8.25s/it]

[3, 20] valid loss: tensor([2.7058, 3.1977, 2.8261], device='cuda:0')


 27%|██▋       | 30/113 [04:08<11:57,  8.64s/it]

[3, 30] valid loss: tensor([2.6375, 3.1422, 2.8122], device='cuda:0')


 35%|███▌      | 40/113 [05:36<10:49,  8.90s/it]

[3, 40] valid loss: tensor([2.4460, 3.3194, 2.8732], device='cuda:0')


 44%|████▍     | 50/113 [07:06<09:23,  8.94s/it]

[3, 50] valid loss: tensor([2.9068, 3.0312, 2.9953], device='cuda:0')


 53%|█████▎    | 60/113 [08:30<07:16,  8.24s/it]

[3, 60] valid loss: tensor([2.4737, 2.8713, 2.9953], device='cuda:0')


 62%|██████▏   | 70/113 [09:53<05:54,  8.24s/it]

[3, 70] valid loss: tensor([2.2947, 2.9248, 2.8032], device='cuda:0')


 71%|███████   | 80/113 [11:17<04:34,  8.32s/it]

[3, 80] valid loss: tensor([2.4143, 3.0567, 2.8843], device='cuda:0')


 80%|███████▉  | 90/113 [12:40<03:11,  8.33s/it]

[3, 90] valid loss: tensor([2.7724, 4.0607, 2.8915], device='cuda:0')


 88%|████████▊ | 100/113 [14:04<01:48,  8.36s/it]

[3, 100] valid loss: tensor([2.8064, 3.0559, 2.7978], device='cuda:0')


 97%|█████████▋| 110/113 [15:28<00:24,  8.32s/it]

[3, 110] valid loss: tensor([2.7010, 3.0900, 2.9463], device='cuda:0')


100%|██████████| 113/113 [15:53<00:00,  8.44s/it]
  3%|▎         | 10/337 [03:28<2:13:21, 24.47s/it]

[4, 10] train loss: tensor([2.6230, 3.1634, 2.8304], device='cuda:0')


  6%|▌         | 20/337 [06:55<2:08:38, 24.35s/it]

[4, 20] train loss: tensor([2.5546, 2.9926, 2.8035], device='cuda:0')


  9%|▉         | 30/337 [10:23<2:05:14, 24.48s/it]

[4, 30] train loss: tensor([2.5370, 3.0507, 2.8093], device='cuda:0')


 11%|█         | 36/337 [12:49<1:47:13, 21.37s/it]


KeyboardInterrupt: 

In [10]:
torch.save(model.state_dict(), 'model_e4.pt')

In [11]:
from model.loss import DetectionLoss

loss = DetectionLoss(model.head)

In [41]:
N = 5

idx = torch.ones(N, 1)*4
cls = torch.rand(N, 1)
box = torch.rand(N, 4)

targets = torch.cat(
    (idx.view(-1, 1), cls.view(-1, 1), box),
    1,
)
targets.shape

torch.Size([5, 6])

In [48]:
counts = targets[:,0].unique(return_counts=True)[1].to(dtype=torch.int32)
out = torch.zeros(16, counts.max(), 5)
out.shape

torch.Size([16, 5, 5])

In [43]:
targets.split((1,4), 0)

(tensor([[4.0000, 0.4179, 0.4273, 0.6513, 0.5899, 0.6324]]),
 tensor([[4.0000, 0.4382, 0.0670, 0.8358, 0.0052, 0.8021],
         [4.0000, 0.1560, 0.6904, 0.3744, 0.3639, 0.7049],
         [4.0000, 0.8038, 0.7348, 0.8887, 0.0342, 0.7100],
         [4.0000, 0.6188, 0.2406, 0.3678, 0.3155, 0.6877]]))

In [9]:
torch.manual_seed(0)
N, s, b, c = 10, 7, 2, 20

outputs = torch.rand(N, s, s, b*5+c)
print(outputs.shape)
print(outputs[0, 0, 0, :])

labels = torch.rand(3, s, s, 5)
labels[:, :, :, 0] = torch.randint(0, c, (3, s, s))
print(labels.shape)
print(labels[0, 0, 0, :])

torch.Size([10, 7, 7, 30])
tensor([0.4963, 0.7682, 0.0885, 0.1320, 0.3074, 0.6341, 0.4901, 0.8964, 0.4556,
        0.6323, 0.3489, 0.4017, 0.0223, 0.1689, 0.2939, 0.5185, 0.6977, 0.8000,
        0.1610, 0.2823, 0.6816, 0.9152, 0.3971, 0.8742, 0.4194, 0.5529, 0.9527,
        0.0362, 0.1852, 0.3734])
torch.Size([3, 7, 7, 5])
tensor([15.0000,  0.5920,  0.3936,  0.0715,  0.5098])


In [None]:
def yolo_loss(outputs, labels, lambda_coord = 5, lambda_noobj = 0.5):
    N, s, b, c = outputs.shape[0], outputs.shape[1], outputs.shape[3]//5-5, outputs.shape[3]-5*(outputs.shape[3]//5)
    
    xy_loss = torch.sum((outputs[:, :, :, 1:5*b+1:5] - labels[:, :, :, 1:5*b+1:5])**2 + (outputs[:, :, :, 1:5*b+1:5] - labels[:, :, :, 1:5*b+1:5])**2)

In [14]:
labels[:, :, :, 1:5*b+1:5].shape

torch.Size([3, 7, 7, 1])

In [None]:
# https://github.com/eriklindernoren/PyTorch-YOLOv3/blob/master/pytorchyolo/utils/loss.py#L58


def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.T

    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    union = w1 * h1 + w2 * h2 - inter + eps

    iou = inter / union
    if GIoU or DIoU or CIoU:
        # convex (smallest enclosing box) width
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
            if DIoU:
                return iou - rho2 / c2  # DIoU
            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * \
                    torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha = v / ((1 + eps) - iou + v)
                return iou - (rho2 / c2 + v * alpha)  # CIoU
        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
            c_area = cw * ch + eps  # convex area
            return iou - (c_area - union) / c_area  # GIoU
    else:
        return iou  # IoU

def compute_loss(predictions, targets, model):
  # Check which device was used
  device = targets.device

  # Add placeholder varables for the different losses
  lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)

  # Build yolo targets
  tcls, tbox, indices, anchors = build_targets(predictions, targets, model)  # targets

  # Define different loss functions classification
  BCEcls = nn.BCEWithLogitsLoss(
      pos_weight=torch.tensor([1.0], device=device))
  BCEobj = nn.BCEWithLogitsLoss(
      pos_weight=torch.tensor([1.0], device=device))

  # Calculate losses for each yolo layer
  for layer_index, layer_predictions in enumerate(predictions):
    # Get image ids, anchors, grid index i and j for each target in the current yolo layer
    b, anchor, grid_j, grid_i = indices[layer_index]
    # Build empty object target tensor with the same shape as the object prediction
    tobj = torch.zeros_like(layer_predictions[..., 0], device=device)  # target obj
    # Get the number of targets for this layer.
    # Each target is a label box with some scaling and the association of an anchor box.
    # Label boxes may be associated to 0 or multiple anchors. So they are multiple times or not at all in the targets.
    num_targets = b.shape[0]
    # Check if there are targets for this batch
    if num_targets:
      # Load the corresponding values from the predictions for each of the targets
      ps = layer_predictions[b, anchor, grid_j, grid_i]

      # Regression of the box
      # Apply sigmoid to xy offset predictions in each cell that has a target
      pxy = ps[:, :2].sigmoid()
      # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
      pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index]
      # Build box out of xy and wh
      pbox = torch.cat((pxy, pwh), 1)
      # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor
      iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True)
      # We want to minimize our loss so we and the best possible IoU is 1 so we take 1 - IoU and reduce it with a mean
      lbox += (1.0 - iou).mean()  # iou loss

      # Classification of the objectness
      # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position
      tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype)  # Use cells with iou > 0 as object targets

      # Classification of the class
      # Check if we need to do a classification (number of classes > 1)
      if ps.size(1) - 5 > 1:
        # Hot one class encoding
        t = torch.zeros_like(ps[:, 5:], device=device)  # targets
        t[range(num_targets), tcls[layer_index]] = 1
        # Use the tensor to calculate the BCE loss
        lcls += BCEcls(ps[:, 5:], t)  # BCE

    # Classification of the objectness the sequel
    # Calculate the BCE loss between the on the fly generated target and the network prediction
    lobj += BCEobj(layer_predictions[..., 4], tobj) # obj loss

  lbox *= 0.05
  lobj *= 1.0
  lcls *= 0.5

  # Merge losses
  loss = lbox + lobj + lcls

  return loss, torch.cat((lbox, lobj, lcls, loss))


def build_targets(p, targets, model):
  # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
  na, nt = 3, targets.shape[0]  # number of anchors, targets #TODO
  tcls, tbox, indices, anch = [], [], [], []
  gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
  # Make a tensor that iterates 0-2 for 3 anchors and repeat that as many times as we have target boxes
  ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
  # Copy target boxes anchor size times and append an anchor index to each copy the anchor index is also expressed by the new first dimension
  targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)

  for i, yolo_layer in enumerate(model.yolo_layers):
      # Scale anchors by the yolo grid cell size so that an anchor with the size of the cell would result in 1
      anchors = yolo_layer.anchors / yolo_layer.stride
      # Add the number of yolo cells in this layer the gain tensor
      # The gain tensor matches the collums of our targets (img id, class, x, y, w, h, anchor id)
      gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
      # Scale targets by the number of yolo layer cells, they are now in the yolo cell coordinate system
      t = targets * gain
      # Check if we have targets
      if nt:
          # Calculate ration between anchor and target box for both width and height
          r = t[:, :, 4:6] / anchors[:, None]
          # Select the ratios that have the highest divergence in any axis and check if the ratio is less than 4
          j = torch.max(r, 1. / r).max(2)[0] < 4  # compare #TODO
          # Only use targets that have the correct ratios for their anchors
          # That means we only keep ones that have a matching anchor and we loose the anchor dimension
          # The anchor id is still saved in the 7th value of each target
          t = t[j]
      else:
          t = targets[0]

      # Extract image id in batch and class id
      b, c = t[:, :2].long().T
      # We isolate the target cell associations.
      # x, y, w, h are allready in the cell coordinate system meaning an x = 1.2 would be 1.2 times cellwidth
      gxy = t[:, 2:4]
      gwh = t[:, 4:6]  # grid wh
      # Cast to int to get an cell index e.g. 1.2 gets associated to cell 1
      gij = gxy.long()
      # Isolate x and y index dimensions
      gi, gj = gij.T  # grid xy indices

      # Convert anchor indexes to int
      a = t[:, 6].long()
      # Add target tensors for this yolo layer to the output lists
      # Add to index list and limit index range to prevent out of bounds
      indices.append((b, a, gj.clamp_(0, gain[3].long() - 1), gi.clamp_(0, gain[2].long() - 1)))
      # Add to target box list and convert box coordinates from global grid coordinates to local offsets in the grid cell
      tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
      # Add correct anchor for each target to the list
      anch.append(anchors[a])
      # Add class for each target to the list
      tcls.append(c)

  return tcls, tbox, indices, anch