In [1]:
from utils.getter import *

In [2]:
from models.retinanet import *
from models.ssd.model import *

In [3]:
device = torch.device('cuda')

In [4]:
transforms = Compose([
    Resize((300,300)),
    ToTensor(),
    Normalize(),
])

In [5]:
data_path = "datasets/datasets/Garbage Classification"
voc_path = "datasets/datasets/VOC/images"
voc_anno = {
    "train": "datasets/datasets/VOC/annotations/pascal_train2012.json",
    "val": "datasets/datasets/VOC/annotations/pascal_val2012.json"}

trainset = ObjectDetectionDataset(img_dir=voc_path, ann_path = voc_anno['train'],transforms= transforms)
valset = ObjectDetectionDataset(img_dir=voc_path, ann_path = voc_anno['val'],transforms= transforms)
NUM_CLASSES = len(trainset.classes)
print(trainset)
print(valset)

Custom Dataset for Object Detection
-------------------------------
Number of samples: 5717
Number of classes: 20

Custom Dataset for Object Detection
-------------------------------
Number of samples: 5823
Number of classes: 20



In [None]:
trainset.visualize_item(figsize=(5,5))

In [6]:
my_collate = RetinaNetCollator()
t = my_collate.encoder._get_anchor_boxes(torch.Tensor([300,300]))
print(t.shape)

torch.Size([17451, 4])


In [None]:
criterion = FocalLoss(num_classes = 20)

In [None]:
BATCH_SIZE = 4
my_collate = trainset.collate_fn#RetinaNetCollator() #trainset.collate_fn, valset.collate_fn
trainloader = data.DataLoader(trainset, batch_size=BATCH_SIZE, collate_fn=my_collate, shuffle=True)
valloader = data.DataLoader(valset, batch_size=BATCH_SIZE, collate_fn=my_collate, shuffle=False)

In [None]:
criterion = MultiBoxLoss#FocalLoss
optimizer = torch.optim.Adam

model = SSDDetector(
                n_classes = NUM_CLASSES,
                lr = 1e-3,
                criterion= criterion, 
                optimizer= optimizer,
                device = device)

load_checkpoint(model, 'weights/2020-08-24_09-39-07/SSD300-30.pth')

In [None]:
trainer = Trainer(model,
                     trainloader, 
                     valloader,
                     clip_grad = 1.0,
                     checkpoint = Checkpoint(save_per_epoch=1),
                     scheduler = StepLR(model.optimizer, step_size=5, gamma=0.5),
                     evaluate_per_epoch = 15)
    
print(trainer)

In [None]:
loc_preds, cls_preds = trainer.inference_batch(valloader)
print(loc_preds[0].shape)
print(cls_preds[0].shape)
results = model.model.detect(
            loc_preds,
            cls_preds,
            min_score=0.01,
            max_overlap=0.45,
            top_k=200)
#results = my_collate.encoder.decode(loc_preds[0].cuda(),cls_preds[0].cuda(),300)
print(results)

In [None]:
boxes, labels, scores = results
scores = scores[0]
labels = labels[0]
boxes = boxes[0]

In [None]:
_,idx = scores.max(dim=0)
np_box = boxes[idx]
np_label = [labels[idx].numpy().tolist()]
np_score = scores[idx].numpy()
print(np_score)

In [None]:
for b in valloader:
    img = b['imgs']
    break
img = img.squeeze(0)

In [None]:
outs = trainset.transforms.denormalize(img = img,box =[np_box],label = None)
true_box = [outs['box'][0].numpy().tolist()]
true_img = outs['img']

In [None]:
trainset.visualize(true_img, true_box, np_label)

In [None]:
print(cls_preds)

In [None]:
print(results[1].unique())

In [None]:
box_test = results[0][0].cpu()
label_test = results[1][0].cpu()
for batch in valloader:
    img_test = batch['imgs'][0]
    break
s = valset.transforms.denormalize(img = img_test, box = box_test, label = label_test)
img = s['img']
boxes = [s['box'].numpy().tolist()]
label = [s['label'].numpy().tolist()]
print(boxes,label)
valset.visualize(img,boxes,label)

In [None]:
for idx, batch in enumerate(trainloader):
    optimizer.zero_grad()
    imgs = batch['imgs'].to(device)
    #boxes = [x.to(device) for x in batch['boxes']]
    #labels = [x.to(device) for x in batch['labels']]
    boxes = batch['boxes'].to(device)
    labels = batch['labels'].to(device)
  
    loc_preds, cls_preds = model(imgs)
    loss = criterion(loc_preds, cls_preds, boxes, labels)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    optimizer.step()

    if idx % 10 == 0:
        print('[{}/{}] | Loss: {} '.format(idx, len(trainloader), loss.item()))
        #print('[{}/{}] | C: {} | B: {} '.format(idx, len(trainloader), losses['C'].item(), losses['B'].item()))

In [None]:
for idx, batch in enumerate(trainloader):
    with torch.no_grad():
        imgs = batch['imgs'].to(device)
        boxes = [x.to(device) for x in batch['boxes']]
        labels = [x.to(device) for x in batch['labels']]
        loc_preds, cls_preds = model(imgs)
        det_boxes, det_labels, det_scores = model.detect_objects(
            loc_preds,
            cls_preds,
            min_score=0.01,
            max_overlap=0.45,
            top_k=200)
    break

In [None]:
print(det_scores)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam
metrics = [AccuracyMetric(decimals=3)]

In [None]:
NUM_CLASSES = len(trainset.classes)
model = ResNet34(NUM_CLASSES,
                 lr = 1e-4,
                 criterion= criterion, 
                 optimizer= optimizer,
                 metrics=  metrics,
                 device = device)
#load_checkpoint(model, "weights/ResNet34-12.pth")

In [None]:
cp = Checkpoint(save_per_epoch=1)
trainer = Trainer(model,
                 trainloader, 
                 valloader,
                 checkpoint = cp, 
                 evaluate_per_epoch = 1)

trainer.fit(num_epochs=5)

In [None]:
preds = trainer.inference_batch(valloader)
print(preds)

In [None]:
idx = 35
print(valset.classes[preds[idx]])
valset.visualize_item(idx, figsize = (8,8))